From 7c88f2bf7840c0ea67ac2de2e293a653f62ea134 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 20 Jul 2017 07:05:28 +0200 Subject: [PATCH 001/279] tile: defconfig: Cleanup from old Kconfig options Remove old, dead Kconfig options (in order appearing in this commit): - CRYPTO_ZLIB: commit 110492183c4b ("crypto: compress - remove unused pcomp interface"); - IP_NF_TARGET_ULOG: commit d4da843e6fad ("netfilter: kill remnants of ulog targets"); Signed-off-by: Krzysztof Kozlowski Signed-off-by: Chris Metcalf --- arch/tile/configs/tilegx_defconfig | 1 - arch/tile/configs/tilepro_defconfig | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index 0d925fa0f0c1..9f94435cc44f 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -409,5 +409,4 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index 149d8e8eacb8..1c5bd4f8ffca 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -189,7 +189,6 @@ CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m @@ -521,7 +520,6 @@ CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m -CONFIG_CRYPTO_ZLIB=m CONFIG_CRYPTO_LZO=m CONFIG_CRC_CCITT=m CONFIG_CRC7=m From 637f23abca87d26e091e0d6647ec878d97d2c6cd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 22 Jul 2017 10:33:02 +0300 Subject: [PATCH 002/279] tile: array underflow in setup_maxnodemem() My static checker correctly complains that we should have a lower bound on "node" to prevent an array underflow. Fixes: 867e359b97c9 ("arch/tile: core support for Tilera 32-bit chips.") Signed-off-by: Dan Carpenter Signed-off-by: Chris Metcalf --- arch/tile/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 443a70bccc1c..b1474e7d9afb 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -140,7 +140,7 @@ static int __init setup_maxnodemem(char *str) { char *endp; unsigned long long maxnodemem; - long node; + unsigned long node; node = str ? simple_strtoul(str, &endp, 0) : INT_MAX; if (node >= MAX_NUMNODES || *endp != ':') From 472b46c352c9ff0b6fa57dbf85d77c51901a3368 Mon Sep 17 00:00:00 2001 From: Mikko Rapeli Date: Sun, 6 Aug 2017 18:44:27 +0200 Subject: [PATCH 003/279] uapi linux/kfd_ioctl.h: only use __u32 and __u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include instead of which on Linux includes and on non-Linux platforms defines __u32 etc types. Fixes user space compilation errors like: linux/kfd_ioctl.h:33:2: error: unknown type name ‘uint32_t’ uint32_t major_version; /* from KFD */ ^~~~~~~~ Signed-off-by: Mikko Rapeli Acked-by: Arnd Bergmann Signed-off-by: Oded Gabbay --- include/uapi/linux/kfd_ioctl.h | 168 ++++++++++++++++----------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 7b4567bacfc2..26283fefdf5f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -23,15 +23,15 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED -#include +#include #include #define KFD_IOCTL_MAJOR_VERSION 1 #define KFD_IOCTL_MINOR_VERSION 1 struct kfd_ioctl_get_version_args { - uint32_t major_version; /* from KFD */ - uint32_t minor_version; /* from KFD */ + __u32 major_version; /* from KFD */ + __u32 minor_version; /* from KFD */ }; /* For kfd_ioctl_create_queue_args.queue_type. */ @@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args { #define KFD_MAX_QUEUE_PRIORITY 15 struct kfd_ioctl_create_queue_args { - uint64_t ring_base_address; /* to KFD */ - uint64_t write_pointer_address; /* from KFD */ - uint64_t read_pointer_address; /* from KFD */ - uint64_t doorbell_offset; /* from KFD */ + __u64 ring_base_address; /* to KFD */ + __u64 write_pointer_address; /* from KFD */ + __u64 read_pointer_address; /* from KFD */ + __u64 doorbell_offset; /* from KFD */ - uint32_t ring_size; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t queue_type; /* to KFD */ - uint32_t queue_percentage; /* to KFD */ - uint32_t queue_priority; /* to KFD */ - uint32_t queue_id; /* from KFD */ + __u32 ring_size; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 queue_type; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* from KFD */ - uint64_t eop_buffer_address; /* to KFD */ - uint64_t eop_buffer_size; /* to KFD */ - uint64_t ctx_save_restore_address; /* to KFD */ - uint64_t ctx_save_restore_size; /* to KFD */ + __u64 eop_buffer_address; /* to KFD */ + __u64 eop_buffer_size; /* to KFD */ + __u64 ctx_save_restore_address; /* to KFD */ + __u64 ctx_save_restore_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { - uint32_t queue_id; /* to KFD */ - uint32_t pad; + __u32 queue_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_update_queue_args { - uint64_t ring_base_address; /* to KFD */ + __u64 ring_base_address; /* to KFD */ - uint32_t queue_id; /* to KFD */ - uint32_t ring_size; /* to KFD */ - uint32_t queue_percentage; /* to KFD */ - uint32_t queue_priority; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 ring_size; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ @@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args { #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 struct kfd_ioctl_set_memory_policy_args { - uint64_t alternate_aperture_base; /* to KFD */ - uint64_t alternate_aperture_size; /* to KFD */ + __u64 alternate_aperture_base; /* to KFD */ + __u64 alternate_aperture_size; /* to KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t default_policy; /* to KFD */ - uint32_t alternate_policy; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 default_policy; /* to KFD */ + __u32 alternate_policy; /* to KFD */ + __u32 pad; }; /* @@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args { */ struct kfd_ioctl_get_clock_counters_args { - uint64_t gpu_clock_counter; /* from KFD */ - uint64_t cpu_clock_counter; /* from KFD */ - uint64_t system_clock_counter; /* from KFD */ - uint64_t system_clock_freq; /* from KFD */ + __u64 gpu_clock_counter; /* from KFD */ + __u64 cpu_clock_counter; /* from KFD */ + __u64 system_clock_counter; /* from KFD */ + __u64 system_clock_freq; /* from KFD */ - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; #define NUM_OF_SUPPORTED_GPUS 7 struct kfd_process_device_apertures { - uint64_t lds_base; /* from KFD */ - uint64_t lds_limit; /* from KFD */ - uint64_t scratch_base; /* from KFD */ - uint64_t scratch_limit; /* from KFD */ - uint64_t gpuvm_base; /* from KFD */ - uint64_t gpuvm_limit; /* from KFD */ - uint32_t gpu_id; /* from KFD */ - uint32_t pad; + __u64 lds_base; /* from KFD */ + __u64 lds_limit; /* from KFD */ + __u64 scratch_base; /* from KFD */ + __u64 scratch_limit; /* from KFD */ + __u64 gpuvm_base; /* from KFD */ + __u64 gpuvm_limit; /* from KFD */ + __u32 gpu_id; /* from KFD */ + __u32 pad; }; struct kfd_ioctl_get_process_apertures_args { @@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args { process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ - uint32_t num_of_nodes; - uint32_t pad; + __u32 num_of_nodes; + __u32 pad; }; #define MAX_ALLOWED_NUM_POINTS 100 @@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args { #define MAX_ALLOWED_WAC_BUFF_SIZE 128 struct kfd_ioctl_dbg_register_args { - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_unregister_args { - uint32_t gpu_id; /* to KFD */ - uint32_t pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_address_watch_args { - uint64_t content_ptr; /* a pointer to the actual content */ - uint32_t gpu_id; /* to KFD */ - uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; struct kfd_ioctl_dbg_wave_control_args { - uint64_t content_ptr; /* a pointer to the actual content */ - uint32_t gpu_id; /* to KFD */ - uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; /* Matching HSA_EVENTTYPE */ @@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args { #define KFD_SIGNAL_EVENT_LIMIT 256 struct kfd_ioctl_create_event_args { - uint64_t event_page_offset; /* from KFD */ - uint32_t event_trigger_data; /* from KFD - signal events only */ - uint32_t event_type; /* to KFD */ - uint32_t auto_reset; /* to KFD */ - uint32_t node_id; /* to KFD - only valid for certain + __u64 event_page_offset; /* from KFD */ + __u32 event_trigger_data; /* from KFD - signal events only */ + __u32 event_type; /* to KFD */ + __u32 auto_reset; /* to KFD */ + __u32 node_id; /* to KFD - only valid for certain event types */ - uint32_t event_id; /* from KFD */ - uint32_t event_slot_index; /* from KFD */ + __u32 event_id; /* from KFD */ + __u32 event_slot_index; /* from KFD */ }; struct kfd_ioctl_destroy_event_args { - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_set_event_args { - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_reset_event_args { - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_memory_exception_failure { - uint32_t NotPresent; /* Page not present or supervisor privilege */ - uint32_t ReadOnly; /* Write access to a read-only page */ - uint32_t NoExecute; /* Execute access to a page marked NX */ - uint32_t pad; + __u32 NotPresent; /* Page not present or supervisor privilege */ + __u32 ReadOnly; /* Write access to a read-only page */ + __u32 NoExecute; /* Execute access to a page marked NX */ + __u32 pad; }; /* memory exception data*/ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; - uint64_t va; - uint32_t gpu_id; - uint32_t pad; + __u64 va; + __u32 gpu_id; + __u32 pad; }; /* Event data*/ @@ -217,19 +217,19 @@ struct kfd_event_data { union { struct kfd_hsa_memory_exception_data memory_exception_data; }; /* From KFD */ - uint64_t kfd_event_data_ext; /* pointer to an extension structure + __u64 kfd_event_data_ext; /* pointer to an extension structure for future exception types */ - uint32_t event_id; /* to KFD */ - uint32_t pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_wait_events_args { - uint64_t events_ptr; /* pointed to struct + __u64 events_ptr; /* pointed to struct kfd_event_data array, to KFD */ - uint32_t num_events; /* to KFD */ - uint32_t wait_for_all; /* to KFD */ - uint32_t timeout; /* to KFD */ - uint32_t wait_result; /* from KFD */ + __u32 num_events; /* to KFD */ + __u32 wait_for_all; /* to KFD */ + __u32 timeout; /* to KFD */ + __u32 wait_result; /* from KFD */ }; struct kfd_ioctl_set_scratch_backing_va_args { From a33b2d0359a0aae25d5ac7a26b85a5682485ebbb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 8 Aug 2016 17:46:23 -0700 Subject: [PATCH 004/279] selftests/seccomp: Add tests for basic ptrace actions This adds tests for using only ptrace to perform syscall changes, just to validate matching behavior between seccomp events and ptrace events. Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 41 +++++++++++++++---- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 73f5ea6778ce..e61b963f011b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1262,6 +1262,13 @@ TEST_F(TRACE_poke, getpid_runs_normally) # error "Do not know how to find your architecture's registers and syscalls" #endif +/* When the syscall return can't be changed, stub out the tests for it. */ +#ifdef SYSCALL_NUM_RET_SHARE_REG +# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) +#else +# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +#endif + /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ @@ -1357,7 +1364,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = 1; + regs.SYSCALL_RET = EPERM; #endif #ifdef HAVE_GETREGS @@ -1426,6 +1433,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); + if (nr == __NR_open) + change_syscall(_metadata, tracee, -1); } FIXTURE_DATA(TRACE_syscall) { @@ -1480,6 +1489,28 @@ FIXTURE_TEARDOWN(TRACE_syscall) free(self->prog.filter); } +TEST_F(TRACE_syscall, ptrace_syscall_redirected) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer will redirect getpid to getppid. */ + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_dropped) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the open syscall, resulting in EPERM. */ + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); +} + TEST_F(TRACE_syscall, syscall_allowed) { long ret; @@ -1520,13 +1551,8 @@ TEST_F(TRACE_syscall, syscall_dropped) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); ASSERT_EQ(0, ret); -#ifdef SYSCALL_NUM_RET_SHARE_REG - /* gettid has been skipped */ - EXPECT_EQ(-1, syscall(__NR_gettid)); -#else /* gettid has been skipped and an altered return value stored. */ - EXPECT_EQ(1, syscall(__NR_gettid)); -#endif + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); EXPECT_NE(self->mytid, syscall(__NR_gettid)); } @@ -1557,6 +1583,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) ASSERT_EQ(0, ret); /* Tracer will redirect getpid to getppid, and we should see EPERM. */ + errno = 0; EXPECT_EQ(-1, syscall(__NR_getpid)); EXPECT_EQ(EPERM, errno); } From 967d7ba8415139107033770a40c7ec7dd17fbcb1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 2 Dec 2015 11:39:36 -0800 Subject: [PATCH 005/279] selftests/seccomp: Add simple seccomp overhead benchmark This attempts to produce a comparison between native getpid() and a RET_ALLOW-filtered getpid(), to measure the overhead cost of using seccomp(). Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/Makefile | 18 +++- .../selftests/seccomp/seccomp_benchmark.c | 99 +++++++++++++++++++ 2 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/seccomp/seccomp_benchmark.c diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index aeb0c805f3ca..553d870b4ca9 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,8 +1,16 @@ -TEST_GEN_PROGS := seccomp_bpf -CFLAGS += -Wl,-no-as-needed -Wall -LDFLAGS += -lpthread +all: include ../lib.mk -$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +.PHONY: all clean + +BINARIES := seccomp_bpf seccomp_benchmark +CFLAGS += -Wl,-no-as-needed -Wall + +seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h + $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@ + +TEST_PROGS += $(BINARIES) +EXTRA_CLEAN := $(BINARIES) + +all: $(BINARIES) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c new file mode 100644 index 000000000000..5838c8697ec3 --- /dev/null +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -0,0 +1,99 @@ +/* + * Strictly speaking, this is not a test. But it can report during test + * runs so relative performace can be measured. + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +unsigned long long timing(clockid_t clk_id, unsigned long long samples) +{ + pid_t pid, ret; + unsigned long long i; + struct timespec start, finish; + + pid = getpid(); + assert(clock_gettime(clk_id, &start) == 0); + for (i = 0; i < samples; i++) { + ret = syscall(__NR_getpid); + assert(pid == ret); + } + assert(clock_gettime(clk_id, &finish) == 0); + + i = finish.tv_sec - start.tv_sec; + i *= 1000000000; + i += finish.tv_nsec - start.tv_nsec; + + printf("%lu.%09lu - %lu.%09lu = %llu\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i); + + return i; +} + +unsigned long long calibrate(void) +{ + unsigned long long i; + + printf("Calibrating reasonable sample size...\n"); + + for (i = 5; ; i++) { + unsigned long long samples = 1 << i; + + /* Find something that takes more than 5 seconds to run. */ + if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) + return samples; + } +} + +int main(int argc, char *argv[]) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + unsigned long long samples; + unsigned long long native, filtered; + + if (argc > 1) + samples = strtoull(argv[1], NULL, 0); + else + samples = calibrate(); + + printf("Benchmarking %llu samples...\n", samples); + + native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid native: %llu ns\n", native); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + assert(ret == 0); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + assert(ret == 0); + + filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW: %llu ns\n", filtered); + + printf("Estimated seccomp overhead per syscall: %llu ns\n", + filtered - native); + + if (filtered == native) + printf("Trying running again with more samples.\n"); + + return 0; +} From f3f6e30669c048f47d51ea59df9946a91f551c4c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 2 Aug 2017 14:08:39 -0700 Subject: [PATCH 006/279] selftests/seccomp: Refactor RET_ERRNO tests This refactors the errno tests (since they all use the same pattern for their filter) and adds a RET_DATA field ordering test. Signed-off-by: Kees Cook Reviewed-by: Tyler Hicks --- tools/testing/selftests/seccomp/seccomp_bpf.c | 95 +++++++++++-------- 1 file changed, 58 insertions(+), 37 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e61b963f011b..2fb49d99588d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -136,7 +136,7 @@ TEST(no_new_privs_support) } } -/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +/* Tests kernel support by checking for a copy_from_user() fault on NULL. */ TEST(mode_filter_support) { long ret; @@ -541,26 +541,30 @@ TEST(arg_out_of_range) EXPECT_EQ(EINVAL, errno); } +#define ERRNO_FILTER(name, errno) \ + struct sock_filter _read_filter_##name[] = { \ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ + offsetof(struct seccomp_data, nr)), \ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ + }; \ + struct sock_fprog prog_##name = { \ + .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ + .filter = _read_filter_##name, \ + } + +/* Make sure basic errno values are correctly passed through a filter. */ TEST(ERRNO_valid) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(valid, E2BIG); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -568,26 +572,17 @@ TEST(ERRNO_valid) EXPECT_EQ(E2BIG, errno); } +/* Make sure an errno of zero is correctly handled by the arch code. */ TEST(ERRNO_zero) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(zero, 0); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -595,26 +590,21 @@ TEST(ERRNO_zero) EXPECT_EQ(0, read(0, NULL, 0)); } +/* + * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. + * This tests that the errno value gets capped correctly, fixed by + * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). + */ TEST(ERRNO_capped) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(capped, 4096); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -622,6 +612,37 @@ TEST(ERRNO_capped) EXPECT_EQ(4095, errno); } +/* + * Filters are processed in reverse order: last applied is executed first. + * Since only the SECCOMP_RET_ACTION mask is tested for return values, the + * SECCOMP_RET_DATA mask results will follow the most recently applied + * matching filter return (and not the lowest or highest value). + */ +TEST(ERRNO_order) +{ + ERRNO_FILTER(first, 11); + ERRNO_FILTER(second, 13); + ERRNO_FILTER(third, 12); + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(12, errno); +} + FIXTURE_DATA(TRAP) { struct sock_fprog prog; }; From deb4de8b31bc5bf21efb6ac31150a01a631cd647 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 2 Aug 2017 15:00:40 -0700 Subject: [PATCH 007/279] seccomp: Provide matching filter for introspection Both the upcoming logging improvements and changes to RET_KILL will need to know which filter a given seccomp return value originated from. In order to delay logic processing of result until after the seccomp loop, this adds a single pointer assignment on matches. This will allow both log and RET_KILL logic to work off the filter rather than doing more expensive tests inside the time-critical run_filters loop. Running tight cycles of getpid() with filters attached shows no measurable difference in speed. Suggested-by: Tyler Hicks Signed-off-by: Kees Cook Reviewed-by: Tyler Hicks --- kernel/seccomp.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 98b59b5db90b..1f3347fc2605 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -171,10 +171,14 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) /** * seccomp_run_filters - evaluates all seccomp filters against @sd * @sd: optional seccomp data to be passed to filters + * @match: stores struct seccomp_filter that resulted in the return value, + * unless filter returned SECCOMP_RET_ALLOW, in which case it will + * be unchanged. * * Returns valid seccomp BPF response codes. */ -static u32 seccomp_run_filters(const struct seccomp_data *sd) +static u32 seccomp_run_filters(const struct seccomp_data *sd, + struct seccomp_filter **match) { struct seccomp_data sd_local; u32 ret = SECCOMP_RET_ALLOW; @@ -198,8 +202,10 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd) for (; f; f = f->prev) { u32 cur_ret = BPF_PROG_RUN(f->prog, sd); - if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) + if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) { ret = cur_ret; + *match = f; + } } return ret; } @@ -566,6 +572,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, const bool recheck_after_trace) { u32 filter_ret, action; + struct seccomp_filter *match = NULL; int data; /* @@ -574,7 +581,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, */ rmb(); - filter_ret = seccomp_run_filters(sd); + filter_ret = seccomp_run_filters(sd, &match); data = filter_ret & SECCOMP_RET_DATA; action = filter_ret & SECCOMP_RET_ACTION; @@ -638,6 +645,11 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; case SECCOMP_RET_ALLOW: + /* + * Note that the "match" filter will always be NULL for + * this action since SECCOMP_RET_ALLOW is the starting + * state in seccomp_run_filters(). + */ return 0; case SECCOMP_RET_KILL: From 8e5f1ad116df6b0de65eac458d5e7c318d1c05af Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:52 +0000 Subject: [PATCH 008/279] seccomp: Sysctl to display available actions This patch creates a read-only sysctl containing an ordered list of seccomp actions that the kernel supports. The ordering, from left to right, is the lowest action value (kill) to the highest action value (allow). Currently, a read of the sysctl file would return "kill trap errno trace allow". The contents of this sysctl file can be useful for userspace code as well as the system administrator. The path to the sysctl is: /proc/sys/kernel/seccomp/actions_avail libseccomp and other userspace code can easily determine which actions the current kernel supports. The set of actions supported by the current kernel may be different than the set of action macros found in kernel headers that were installed where the userspace code was built. In addition, this sysctl will allow system administrators to know which actions are supported by the kernel and make it easier to configure exactly what seccomp logs through the audit subsystem. Support for this level of logging configuration will come in a future patch. Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- Documentation/sysctl/kernel.txt | 1 + .../userspace-api/seccomp_filter.rst | 16 ++++++ kernel/seccomp.c | 51 +++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index bac23c198360..995c42cf86ba 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -74,6 +74,7 @@ show up in /proc/sys/kernel: - reboot-cmd [ SPARC only ] - rtsig-max - rtsig-nr +- seccomp/ ==> Documentation/userspace-api/seccomp_filter.rst - sem - sem_next_id [ sysv ipc ] - sg-big-buff [ generic SCSI device (sg) ] diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index f71eb5ef1f2d..35fc7cbf1d95 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -169,7 +169,23 @@ The ``samples/seccomp/`` directory contains both an x86-specific example and a more generic example of a higher level macro interface for BPF program generation. +Sysctls +======= +Seccomp's sysctl files can be found in the ``/proc/sys/kernel/seccomp/`` +directory. Here's a description of each file in that directory: + +``actions_avail``: + A read-only ordered list of seccomp return values (refer to the + ``SECCOMP_RET_*`` macros above) in string form. The ordering, from + left-to-right, is the least permissive return value to the most + permissive return value. + + The list represents the set of seccomp return values supported + by the kernel. A userspace program may use this list to + determine if the actions found in the ``seccomp.h``, when the + program was built, differs from the set of actions actually + supported in the current running kernel. Adding architecture support =========================== diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 1f3347fc2605..5f19f41e4e50 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -17,11 +17,13 @@ #include #include #include +#include #include #include #include #include #include +#include #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include @@ -934,3 +936,52 @@ out: return ret; } #endif + +#ifdef CONFIG_SYSCTL + +/* Human readable action names for friendly sysctl interaction */ +#define SECCOMP_RET_KILL_NAME "kill" +#define SECCOMP_RET_TRAP_NAME "trap" +#define SECCOMP_RET_ERRNO_NAME "errno" +#define SECCOMP_RET_TRACE_NAME "trace" +#define SECCOMP_RET_ALLOW_NAME "allow" + +static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " + SECCOMP_RET_TRAP_NAME " " + SECCOMP_RET_ERRNO_NAME " " + SECCOMP_RET_TRACE_NAME " " + SECCOMP_RET_ALLOW_NAME; + +static struct ctl_path seccomp_sysctl_path[] = { + { .procname = "kernel", }, + { .procname = "seccomp", }, + { } +}; + +static struct ctl_table seccomp_sysctl_table[] = { + { + .procname = "actions_avail", + .data = (void *) &seccomp_actions_avail, + .maxlen = sizeof(seccomp_actions_avail), + .mode = 0444, + .proc_handler = proc_dostring, + }, + { } +}; + +static int __init seccomp_sysctl_init(void) +{ + struct ctl_table_header *hdr; + + hdr = register_sysctl_paths(seccomp_sysctl_path, seccomp_sysctl_table); + if (!hdr) + pr_warn("seccomp: sysctl registration failed\n"); + else + kmemleak_not_leak(hdr); + + return 0; +} + +device_initcall(seccomp_sysctl_init) + +#endif /* CONFIG_SYSCTL */ From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:53 +0000 Subject: [PATCH 009/279] seccomp: Operation for checking if an action is available Userspace code that needs to check if the kernel supports a given action may not be able to use the /proc/sys/kernel/seccomp/actions_avail sysctl. The process may be running in a sandbox and, therefore, sufficient filesystem access may not be available. This patch adds an operation to the seccomp(2) syscall that allows userspace code to ask the kernel if a given action is available. If the action is supported by the kernel, 0 is returned. If the action is not supported by the kernel, -1 is returned with errno set to -EOPNOTSUPP. If this check is attempted on a kernel that doesn't support this new operation, -1 is returned with errno set to -EINVAL meaning that userspace code will have the ability to differentiate between the two error cases. Signed-off-by: Tyler Hicks Suggested-by: Andy Lutomirski Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 5 +-- kernel/seccomp.c | 26 ++++++++++++++ tools/testing/selftests/seccomp/seccomp_bpf.c | 36 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 0f238a43ff1e..aaad61cc46bc 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -11,8 +11,9 @@ #define SECCOMP_MODE_FILTER 2 /* uses user-supplied filter. */ /* Valid operations for seccomp syscall. */ -#define SECCOMP_SET_MODE_STRICT 0 -#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_SET_MODE_STRICT 0 +#define SECCOMP_SET_MODE_FILTER 1 +#define SECCOMP_GET_ACTION_AVAIL 2 /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5f19f41e4e50..7a6089f66fed 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -808,6 +808,27 @@ static inline long seccomp_set_mode_filter(unsigned int flags, } #endif +static long seccomp_get_action_avail(const char __user *uaction) +{ + u32 action; + + if (copy_from_user(&action, uaction, sizeof(action))) + return -EFAULT; + + switch (action) { + case SECCOMP_RET_KILL: + case SECCOMP_RET_TRAP: + case SECCOMP_RET_ERRNO: + case SECCOMP_RET_TRACE: + case SECCOMP_RET_ALLOW: + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + /* Common entry point for both prctl and syscall. */ static long do_seccomp(unsigned int op, unsigned int flags, const char __user *uargs) @@ -819,6 +840,11 @@ static long do_seccomp(unsigned int op, unsigned int flags, return seccomp_set_mode_strict(); case SECCOMP_SET_MODE_FILTER: return seccomp_set_mode_filter(flags, uargs); + case SECCOMP_GET_ACTION_AVAIL: + if (flags != 0) + return -EINVAL; + + return seccomp_get_action_avail(uargs); default: return -EINVAL; } diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 2fb49d99588d..1f2888f6678b 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1731,6 +1731,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) #define SECCOMP_SET_MODE_FILTER 1 #endif +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif + #ifndef SECCOMP_FILTER_FLAG_TSYNC #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif @@ -2469,6 +2473,38 @@ TEST(syscall_restart) _metadata->passed = 0; } +TEST(get_action_avail) +{ + __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, + SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, + SECCOMP_RET_ALLOW }; + __u32 unknown_action = 0x10000000U; + int i; + long ret; + + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); + } + EXPECT_EQ(ret, 0); + + for (i = 0; i < ARRAY_SIZE(actions); i++) { + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); + EXPECT_EQ(ret, 0) { + TH_LOG("Expected action (0x%X) not available!", + actions[i]); + } + } + + /* Check that an unknown action is handled properly (EOPNOTSUPP) */ + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); +} + /* * TODO: * - add microbenchmarks From 0ddec0fc8900201c0897b87b762b7c420436662f Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:54 +0000 Subject: [PATCH 010/279] seccomp: Sysctl to configure actions that are allowed to be logged Adminstrators can write to this sysctl to set the seccomp actions that are allowed to be logged. Any actions not found in this sysctl will not be logged. For example, all SECCOMP_RET_KILL, SECCOMP_RET_TRAP, and SECCOMP_RET_ERRNO actions would be loggable if "kill trap errno" were written to the sysctl. SECCOMP_RET_TRACE actions would not be logged since its string representation ("trace") wasn't present in the sysctl value. The path to the sysctl is: /proc/sys/kernel/seccomp/actions_logged The actions_avail sysctl can be read to discover the valid action names that can be written to the actions_logged sysctl with the exception of "allow". SECCOMP_RET_ALLOW actions cannot be configured for logging. The default setting for the sysctl is to allow all actions to be logged except SECCOMP_RET_ALLOW. While only SECCOMP_RET_KILL actions are currently logged, an upcoming patch will allow applications to request additional actions to be logged. There's one important exception to this sysctl. If a task is specifically being audited, meaning that an audit context has been allocated for the task, seccomp will log all actions other than SECCOMP_RET_ALLOW despite the value of actions_logged. This exception preserves the existing auditing behavior of tasks with an allocated audit context. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if audit_enabled && task-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- .../userspace-api/seccomp_filter.rst | 18 ++ include/linux/audit.h | 6 +- kernel/seccomp.c | 171 +++++++++++++++++- 3 files changed, 187 insertions(+), 8 deletions(-) diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 35fc7cbf1d95..2d1d8ab04ac5 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -187,6 +187,24 @@ directory. Here's a description of each file in that directory: program was built, differs from the set of actions actually supported in the current running kernel. +``actions_logged``: + A read-write ordered list of seccomp return values (refer to the + ``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes + to the file do not need to be in ordered form but reads from the file + will be ordered in the same way as the actions_avail sysctl. + + It is important to note that the value of ``actions_logged`` does not + prevent certain actions from being logged when the audit subsystem is + configured to audit a task. If the action is not found in + ``actions_logged`` list, the final decision on whether to audit the + action for that task is ultimately left up to the audit subsystem to + decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``. + + The ``allow`` string is not accepted in the ``actions_logged`` sysctl + as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting + to write ``allow`` to the sysctl will result in an EINVAL being + returned. + Adding architecture support =========================== diff --git a/include/linux/audit.h b/include/linux/audit.h index 2150bdccfbab..8c30f06d639d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -314,11 +314,7 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (!audit_enabled) - return; - - /* Force a record to be reported if a signal was delivered. */ - if (signr || unlikely(!audit_dummy_context())) + if (audit_enabled && unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 7a6089f66fed..54357e361aea 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -522,6 +522,45 @@ static void seccomp_send_sigsys(int syscall, int reason) } #endif /* CONFIG_SECCOMP_FILTER */ +/* For use with seccomp_actions_logged */ +#define SECCOMP_LOG_KILL (1 << 0) +#define SECCOMP_LOG_TRAP (1 << 2) +#define SECCOMP_LOG_ERRNO (1 << 3) +#define SECCOMP_LOG_TRACE (1 << 4) +#define SECCOMP_LOG_ALLOW (1 << 5) + +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | + SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE; + +static inline void seccomp_log(unsigned long syscall, long signr, u32 action) +{ + bool log = false; + + switch (action) { + case SECCOMP_RET_ALLOW: + case SECCOMP_RET_TRAP: + case SECCOMP_RET_ERRNO: + case SECCOMP_RET_TRACE: + break; + case SECCOMP_RET_KILL: + default: + log = seccomp_actions_logged & SECCOMP_LOG_KILL; + } + + /* + * Force an audit message to be emitted when the action is RET_KILL and + * the action is allowed to be logged by the admin. + */ + if (log) + return __audit_seccomp(syscall, signr, action); + + /* + * Let the audit subsystem decide if the action should be audited based + * on whether the current task itself is being audited. + */ + return audit_seccomp(syscall, signr, action); +} + /* * Secure computing mode 1 allows only read/write/exit/sigreturn. * To be fully secure this must be combined with rlimit @@ -547,7 +586,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL); do_exit(SIGKILL); } @@ -656,7 +695,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL: default: - audit_seccomp(this_syscall, SIGSYS, action); + seccomp_log(this_syscall, SIGSYS, action); /* Dump core only if this is the last remaining thread. */ if (get_nr_threads(current) == 1) { siginfo_t info; @@ -673,7 +712,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, unreachable(); skip: - audit_seccomp(this_syscall, 0, action); + seccomp_log(this_syscall, 0, action); return -1; } #else @@ -978,6 +1017,127 @@ static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " SECCOMP_RET_TRACE_NAME " " SECCOMP_RET_ALLOW_NAME; +struct seccomp_log_name { + u32 log; + const char *name; +}; + +static const struct seccomp_log_name seccomp_log_names[] = { + { SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME }, + { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, + { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, + { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, + { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, + { } +}; + +static bool seccomp_names_from_actions_logged(char *names, size_t size, + u32 actions_logged) +{ + const struct seccomp_log_name *cur; + bool append_space = false; + + for (cur = seccomp_log_names; cur->name && size; cur++) { + ssize_t ret; + + if (!(actions_logged & cur->log)) + continue; + + if (append_space) { + ret = strscpy(names, " ", size); + if (ret < 0) + return false; + + names += ret; + size -= ret; + } else + append_space = true; + + ret = strscpy(names, cur->name, size); + if (ret < 0) + return false; + + names += ret; + size -= ret; + } + + return true; +} + +static bool seccomp_action_logged_from_name(u32 *action_logged, + const char *name) +{ + const struct seccomp_log_name *cur; + + for (cur = seccomp_log_names; cur->name; cur++) { + if (!strcmp(cur->name, name)) { + *action_logged = cur->log; + return true; + } + } + + return false; +} + +static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names) +{ + char *name; + + *actions_logged = 0; + while ((name = strsep(&names, " ")) && *name) { + u32 action_logged = 0; + + if (!seccomp_action_logged_from_name(&action_logged, name)) + return false; + + *actions_logged |= action_logged; + } + + return true; +} + +static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(seccomp_actions_avail)]; + struct ctl_table table; + int ret; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + memset(names, 0, sizeof(names)); + + if (!write) { + if (!seccomp_names_from_actions_logged(names, sizeof(names), + seccomp_actions_logged)) + return -EINVAL; + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + if (write) { + u32 actions_logged; + + if (!seccomp_actions_logged_from_names(&actions_logged, + table.data)) + return -EINVAL; + + if (actions_logged & SECCOMP_LOG_ALLOW) + return -EINVAL; + + seccomp_actions_logged = actions_logged; + } + + return 0; +} + static struct ctl_path seccomp_sysctl_path[] = { { .procname = "kernel", }, { .procname = "seccomp", }, @@ -992,6 +1152,11 @@ static struct ctl_table seccomp_sysctl_table[] = { .mode = 0444, .proc_handler = proc_dostring, }, + { + .procname = "actions_logged", + .mode = 0644, + .proc_handler = seccomp_actions_logged_handler, + }, { } }; From 2b7ea5b5b5799f2878ed454bb48032bed6d101d3 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:55 +0000 Subject: [PATCH 011/279] seccomp: Selftest for detection of filter flag support Userspace needs to be able to reliably detect the support of a filter flag. A good way of doing that is by attempting to enter filter mode, with the flag bit(s) in question set, and a NULL pointer for the args parameter of seccomp(2). EFAULT indicates that the flag is valid and EINVAL indicates that the flag is invalid. This patch adds a selftest that can be used to test this method of detection in userspace. Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1f2888f6678b..abf708e09892 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1835,6 +1835,66 @@ TEST(seccomp_syscall_mode_lock) } } +/* + * Test detection of known and unknown filter flags. Userspace needs to be able + * to check if a filter flag is supported by the current kernel and a good way + * of doing that is by attempting to enter filter mode, with the flag bit in + * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates + * that the flag is valid and EINVAL indicates that the flag is invalid. + */ +TEST(detect_seccomp_filter_flags) +{ + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC }; + unsigned int flag, all_flags; + int i; + long ret; + + /* Test detection of known-good filter flags */ + for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", + flag); + } + + all_flags |= flag; + } + + /* Test detection of all known-good filter flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + all_flags); + } + + /* Test detection of an unknown filter flag */ + flag = -1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", + flag); + } + + /* + * Test detection of an unknown filter flag that may simply need to be + * added to this test + */ + flag = flags[ARRAY_SIZE(flags) - 1] << 1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", + flag); + } +} + TEST(TSYNC_first) { struct sock_filter filter[] = { From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:56 +0000 Subject: [PATCH 012/279] seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for all actions except for SECCOMP_RET_ALLOW for the given filter. SECCOMP_RET_KILL actions are always logged, when "kill" is in the actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged, regardless of this flag. This flag can be used to create noisy filters that result in all non-allowed actions to be logged. A process may have one noisy filter, which is loaded with this flag, as well as a quiet filter that's not loaded with this flag. This allows for the actions in a set of filters to be selectively conveyed to the admin. Since a system could have a large number of allocated seccomp_filter structs, struct packing was taken in consideration. On 64 bit x86, the new log member takes up one byte of an existing four byte hole in the struct. On 32 bit x86, the new log member creates a new four byte hole (unavoidable) and consumes one of those bytes. Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not capable of inspecting the audit log to verify that the actions taken in the filter were logged. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if filter-requests-logging && action in actions_logged: log else if audit_enabled && process-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- include/linux/seccomp.h | 3 +- include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 26 +++++-- tools/testing/selftests/seccomp/seccomp_bpf.c | 69 ++++++++++++++++++- 4 files changed, 91 insertions(+), 8 deletions(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index ecc296c137cd..c8bef436b61d 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -3,7 +3,8 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index aaad61cc46bc..19a611d0712e 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -17,6 +17,7 @@ /* Valid flags for SECCOMP_SET_MODE_FILTER */ #define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_LOG 2 /* * All BPF programs must return a 32-bit value. diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 54357e361aea..ed9fde418fc4 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -44,6 +44,7 @@ * get/put helpers should be used when accessing an instance * outside of a lifetime-guarded section. In general, this * is only needed for handling filters shared across tasks. + * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged * @prev: points to a previously installed, or inherited, filter * @prog: the BPF program to evaluate * @@ -59,6 +60,7 @@ */ struct seccomp_filter { refcount_t usage; + bool log; struct seccomp_filter *prev; struct bpf_prog *prog; }; @@ -452,6 +454,10 @@ static long seccomp_attach_filter(unsigned int flags, return ret; } + /* Set log flag, if present. */ + if (flags & SECCOMP_FILTER_FLAG_LOG) + filter->log = true; + /* * If there is an existing filter, make it the prev and don't drop its * task reference. @@ -532,15 +538,22 @@ static void seccomp_send_sigsys(int syscall, int reason) static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE; -static inline void seccomp_log(unsigned long syscall, long signr, u32 action) +static inline void seccomp_log(unsigned long syscall, long signr, u32 action, + bool requested) { bool log = false; switch (action) { case SECCOMP_RET_ALLOW: + break; case SECCOMP_RET_TRAP: + log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP; + break; case SECCOMP_RET_ERRNO: + log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO; + break; case SECCOMP_RET_TRACE: + log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; break; case SECCOMP_RET_KILL: default: @@ -548,8 +561,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action) } /* - * Force an audit message to be emitted when the action is RET_KILL and - * the action is allowed to be logged by the admin. + * Force an audit message to be emitted when the action is RET_KILL or + * the FILTER_FLAG_LOG bit was set and the action is allowed to be + * logged by the admin. */ if (log) return __audit_seccomp(syscall, signr, action); @@ -586,7 +600,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true); do_exit(SIGKILL); } @@ -695,7 +709,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL: default: - seccomp_log(this_syscall, SIGSYS, action); + seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ if (get_nr_threads(current) == 1) { siginfo_t info; @@ -712,7 +726,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, unreachable(); skip: - seccomp_log(this_syscall, 0, action); + seccomp_log(this_syscall, 0, action, match ? match->log : false); return -1; } #else diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index abf708e09892..1c8c22ce7740 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1739,6 +1739,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif +#ifndef SECCOMP_FILTER_FLAG_LOG +#define SECCOMP_FILTER_FLAG_LOG 2 +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -1844,7 +1848,8 @@ TEST(seccomp_syscall_mode_lock) */ TEST(detect_seccomp_filter_flags) { - unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC }; + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_LOG }; unsigned int flag, all_flags; int i; long ret; @@ -2533,6 +2538,67 @@ TEST(syscall_restart) _metadata->passed = 0; } +TEST_SIGNAL(filter_flag_log, SIGSYS) +{ + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter kill_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct sock_fprog kill_prog = { + .len = (unsigned short)ARRAY_SIZE(kill_filter), + .filter = kill_filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_NE(0, ret) { + TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); + } + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); + } + + /* Verify that a simple, permissive filter can be added with no flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); + EXPECT_EQ(0, ret); + + /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); + } + EXPECT_EQ(0, ret); + + /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &kill_prog); + EXPECT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + TEST(get_action_avail) { __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, @@ -2573,6 +2639,7 @@ TEST(get_action_avail) * - endianness checking when appropriate * - 64-bit arg prodding * - arch value testing (x86 modes especially) + * - verify that FILTER_FLAG_LOG filters generate log messages * - ... */ From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001 From: Tyler Hicks Date: Fri, 11 Aug 2017 04:33:57 +0000 Subject: [PATCH 013/279] seccomp: Action to log before allowing Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing the syscall. At the implementation level, this action is identical to the existing SECCOMP_RET_ALLOW action. However, it can be very useful when initially developing a seccomp filter for an application. The developer can set the default action to be SECCOMP_RET_LOG, maybe mark any obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the application through its paces. A list of syscalls that triggered the default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and that list can be used to build the syscall whitelist. Finally, the developer can change the default action to the desired value. This provides a more friendly experience than seeing the application get killed, then updating the filter and rebuilding the app, seeing the application get killed due to a different syscall, then updating the filter and rebuilding the app, etc. The functionality is similar to what's supported by the various LSMs. SELinux has permissive mode, AppArmor has complain mode, SMACK has bring-up mode, etc. SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow while logging is slightly more restrictive than quietly allowing. Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of inspecting the audit log to verify that the syscall was logged. With this patch, the logic for deciding if an action will be logged is: if action == RET_ALLOW: do not log else if action == RET_KILL && RET_KILL in actions_logged: log else if action == RET_LOG && RET_LOG in actions_logged: log else if filter-requests-logging && action in actions_logged: log else if audit_enabled && process-is-being-audited: log else: do not log Signed-off-by: Tyler Hicks Signed-off-by: Kees Cook --- .../userspace-api/seccomp_filter.rst | 9 ++ include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 23 ++++- tools/testing/selftests/seccomp/seccomp_bpf.c | 98 ++++++++++++++++++- 4 files changed, 125 insertions(+), 6 deletions(-) diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 2d1d8ab04ac5..f4977357daf2 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -141,6 +141,15 @@ In precedence order, they are: allow use of ptrace, even of other sandboxed processes, without extreme care; ptracers can use this mechanism to escape.) +``SECCOMP_RET_LOG``: + Results in the system call being executed after it is logged. This + should be used by application developers to learn which syscalls their + application needs without having to iterate through multiple test and + development cycles to build the list. + + This action will only be logged if "log" is present in the + actions_logged sysctl string. + ``SECCOMP_RET_ALLOW``: Results in the system call being executed. diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 19a611d0712e..f94433263e4b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -31,6 +31,7 @@ #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ed9fde418fc4..59cde2ed3b92 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -533,10 +533,12 @@ static void seccomp_send_sigsys(int syscall, int reason) #define SECCOMP_LOG_TRAP (1 << 2) #define SECCOMP_LOG_ERRNO (1 << 3) #define SECCOMP_LOG_TRACE (1 << 4) -#define SECCOMP_LOG_ALLOW (1 << 5) +#define SECCOMP_LOG_LOG (1 << 5) +#define SECCOMP_LOG_ALLOW (1 << 6) static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | - SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE; + SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE | + SECCOMP_LOG_LOG; static inline void seccomp_log(unsigned long syscall, long signr, u32 action, bool requested) @@ -555,15 +557,18 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, case SECCOMP_RET_TRACE: log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE; break; + case SECCOMP_RET_LOG: + log = seccomp_actions_logged & SECCOMP_LOG_LOG; + break; case SECCOMP_RET_KILL: default: log = seccomp_actions_logged & SECCOMP_LOG_KILL; } /* - * Force an audit message to be emitted when the action is RET_KILL or - * the FILTER_FLAG_LOG bit was set and the action is allowed to be - * logged by the admin. + * Force an audit message to be emitted when the action is RET_KILL, + * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is + * allowed to be logged by the admin. */ if (log) return __audit_seccomp(syscall, signr, action); @@ -699,6 +704,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; + case SECCOMP_RET_LOG: + seccomp_log(this_syscall, 0, action, true); + return 0; + case SECCOMP_RET_ALLOW: /* * Note that the "match" filter will always be NULL for @@ -873,6 +882,7 @@ static long seccomp_get_action_avail(const char __user *uaction) case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: case SECCOMP_RET_TRACE: + case SECCOMP_RET_LOG: case SECCOMP_RET_ALLOW: break; default: @@ -1023,12 +1033,14 @@ out: #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" #define SECCOMP_RET_TRACE_NAME "trace" +#define SECCOMP_RET_LOG_NAME "log" #define SECCOMP_RET_ALLOW_NAME "allow" static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " SECCOMP_RET_TRAP_NAME " " SECCOMP_RET_ERRNO_NAME " " SECCOMP_RET_TRACE_NAME " " + SECCOMP_RET_LOG_NAME " " SECCOMP_RET_ALLOW_NAME; struct seccomp_log_name { @@ -1041,6 +1053,7 @@ static const struct seccomp_log_name seccomp_log_names[] = { { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, + { SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME }, { SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME }, { } }; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 1c8c22ce7740..7372958eccb5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -74,7 +74,12 @@ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#endif +#ifndef SECCOMP_RET_LOG +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#endif +#ifndef SECCOMP_RET_ACTION /* Masks for the return value sections. */ #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU @@ -342,6 +347,28 @@ TEST(empty_prog) EXPECT_EQ(EINVAL, errno); } +TEST(log_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + /* getppid() should succeed and be logged (no check for logging) */ + EXPECT_EQ(parent, syscall(__NR_getppid)); +} + TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) { struct sock_filter filter[] = { @@ -756,6 +783,7 @@ TEST_F(TRAP, handler) FIXTURE_DATA(precedence) { struct sock_fprog allow; + struct sock_fprog log; struct sock_fprog trace; struct sock_fprog error; struct sock_fprog trap; @@ -767,6 +795,13 @@ FIXTURE_SETUP(precedence) struct sock_filter allow_insns[] = { BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; + struct sock_filter log_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; struct sock_filter trace_insns[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), @@ -803,6 +838,7 @@ FIXTURE_SETUP(precedence) memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) FILTER_ALLOC(allow); + FILTER_ALLOC(log); FILTER_ALLOC(trace); FILTER_ALLOC(error); FILTER_ALLOC(trap); @@ -813,6 +849,7 @@ FIXTURE_TEARDOWN(precedence) { #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) FILTER_FREE(allow); + FILTER_FREE(log); FILTER_FREE(trace); FILTER_FREE(error); FILTER_FREE(trap); @@ -830,6 +867,8 @@ TEST_F(precedence, allow_ok) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -854,6 +893,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -885,6 +926,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); @@ -906,6 +949,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -931,6 +976,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -952,6 +999,8 @@ TEST_F(precedence, errno_is_third) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -970,6 +1019,8 @@ TEST_F(precedence, errno_is_third_in_any_order) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); @@ -992,6 +1043,8 @@ TEST_F(precedence, trace_is_fourth) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); /* Should work just fine. */ @@ -1013,12 +1066,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); /* Should work just fine. */ EXPECT_EQ(parent, syscall(__NR_getppid)); /* No ptracer */ EXPECT_EQ(-1, syscall(__NR_getpid)); } +TEST_F(precedence, log_is_fifth) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + +TEST_F(precedence, log_is_fifth_in_any_order) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + #ifndef PTRACE_O_TRACESECCOMP #define PTRACE_O_TRACESECCOMP 0x00000080 #endif @@ -2603,7 +2698,7 @@ TEST(get_action_avail) { __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, - SECCOMP_RET_ALLOW }; + SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; __u32 unknown_action = 0x10000000U; int i; long ret; @@ -2640,6 +2735,7 @@ TEST(get_action_avail) * - 64-bit arg prodding * - arch value testing (x86 modes especially) * - verify that FILTER_FLAG_LOG filters generate log messages + * - verify that RET_LOG generates log messages * - ... */ From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 12:53:18 -0700 Subject: [PATCH 014/279] seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL to the more accurate SECCOMP_RET_KILL_THREAD. The existing selftest values are intentionally left as SECCOMP_RET_KILL just to be sure we're exercising the alias. Signed-off-by: Kees Cook --- Documentation/networking/filter.txt | 2 +- .../userspace-api/seccomp_filter.rst | 4 +- include/uapi/linux/seccomp.h | 3 +- kernel/seccomp.c | 39 ++++++++++--------- samples/seccomp/bpf-direct.c | 4 +- samples/seccomp/bpf-helper.h | 2 +- tools/testing/selftests/seccomp/seccomp_bpf.c | 17 ++++---- 7 files changed, 39 insertions(+), 32 deletions(-) diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index b69b205501de..73aa0f12156d 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -337,7 +337,7 @@ Examples for low-level BPF: jeq #14, good /* __NR_rt_sigprocmask */ jeq #13, good /* __NR_rt_sigaction */ jeq #35, good /* __NR_nanosleep */ - bad: ret #0 /* SECCOMP_RET_KILL */ + bad: ret #0 /* SECCOMP_RET_KILL_THREAD */ good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ The above example code can be placed into a file (here called "foo"), and diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index f4977357daf2..d76396f2d8ed 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -87,11 +87,11 @@ Return values A seccomp filter may return any of the following values. If multiple filters exist, the return value for the evaluation of a given system call will always use the highest precedent value. (For example, -``SECCOMP_RET_KILL`` will always take precedence.) +``SECCOMP_RET_KILL_THREAD`` will always take precedence.) In precedence order, they are: -``SECCOMP_RET_KILL``: +``SECCOMP_RET_KILL_THREAD``: Results in the task exiting immediately without executing the system call. The exit status of the task (``status & 0x7f``) will be ``SIGSYS``, not ``SIGKILL``. diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index f94433263e4b..5a03f699eb17 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -27,7 +27,8 @@ * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 59cde2ed3b92..95ac54cff00f 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, /* Ensure unexpected behavior doesn't result in failing open. */ if (unlikely(WARN_ON(f == NULL))) - return SECCOMP_RET_KILL; + return SECCOMP_RET_KILL_THREAD; if (!sd) { populate_seccomp_data(&sd_local); @@ -529,15 +529,17 @@ static void seccomp_send_sigsys(int syscall, int reason) #endif /* CONFIG_SECCOMP_FILTER */ /* For use with seccomp_actions_logged */ -#define SECCOMP_LOG_KILL (1 << 0) +#define SECCOMP_LOG_KILL_THREAD (1 << 0) #define SECCOMP_LOG_TRAP (1 << 2) #define SECCOMP_LOG_ERRNO (1 << 3) #define SECCOMP_LOG_TRACE (1 << 4) #define SECCOMP_LOG_LOG (1 << 5) #define SECCOMP_LOG_ALLOW (1 << 6) -static u32 seccomp_actions_logged = SECCOMP_LOG_KILL | SECCOMP_LOG_TRAP | - SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE | +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD | + SECCOMP_LOG_TRAP | + SECCOMP_LOG_ERRNO | + SECCOMP_LOG_TRACE | SECCOMP_LOG_LOG; static inline void seccomp_log(unsigned long syscall, long signr, u32 action, @@ -560,13 +562,13 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, case SECCOMP_RET_LOG: log = seccomp_actions_logged & SECCOMP_LOG_LOG; break; - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: default: - log = seccomp_actions_logged & SECCOMP_LOG_KILL; + log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; } /* - * Force an audit message to be emitted when the action is RET_KILL, + * Force an audit message to be emitted when the action is RET_KILL_*, * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is * allowed to be logged by the admin. */ @@ -605,7 +607,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif - seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true); + seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); do_exit(SIGKILL); } @@ -716,7 +718,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, */ return 0; - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: default: seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ @@ -878,7 +880,7 @@ static long seccomp_get_action_avail(const char __user *uaction) return -EFAULT; switch (action) { - case SECCOMP_RET_KILL: + case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: case SECCOMP_RET_TRACE: @@ -1029,19 +1031,20 @@ out: #ifdef CONFIG_SYSCTL /* Human readable action names for friendly sysctl interaction */ -#define SECCOMP_RET_KILL_NAME "kill" +#define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" #define SECCOMP_RET_TRACE_NAME "trace" #define SECCOMP_RET_LOG_NAME "log" #define SECCOMP_RET_ALLOW_NAME "allow" -static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME " " - SECCOMP_RET_TRAP_NAME " " - SECCOMP_RET_ERRNO_NAME " " - SECCOMP_RET_TRACE_NAME " " - SECCOMP_RET_LOG_NAME " " - SECCOMP_RET_ALLOW_NAME; +static const char seccomp_actions_avail[] = + SECCOMP_RET_KILL_THREAD_NAME " " + SECCOMP_RET_TRAP_NAME " " + SECCOMP_RET_ERRNO_NAME " " + SECCOMP_RET_TRACE_NAME " " + SECCOMP_RET_LOG_NAME " " + SECCOMP_RET_ALLOW_NAME; struct seccomp_log_name { u32 log; @@ -1049,7 +1052,7 @@ struct seccomp_log_name { }; static const struct seccomp_log_name seccomp_log_names[] = { - { SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME }, + { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, { SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME }, diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c index 151ec3f52189..235ce3c49ee9 100644 --- a/samples/seccomp/bpf-direct.c +++ b/samples/seccomp/bpf-direct.c @@ -129,7 +129,7 @@ static int install_filter(void) /* Check that read is only using stdin. */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD), /* Check that write is only using stdout */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), @@ -139,7 +139,7 @@ static int install_filter(void) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD), }; struct sock_fprog prog = { .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 1d8de9edd858..83dbe79cbe2c 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #define ALLOW \ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) #define DENY \ - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD) #define JUMP(labels, label) \ BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ JUMP_JT, JUMP_JF) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7372958eccb5..a3ba39a32449 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -68,15 +68,18 @@ #define SECCOMP_MODE_FILTER 2 #endif +#ifndef SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#endif #ifndef SECCOMP_RET_KILL -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ #endif #ifndef SECCOMP_RET_LOG -#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #endif #ifndef SECCOMP_RET_ACTION @@ -2696,7 +2699,7 @@ TEST_SIGNAL(filter_flag_log, SIGSYS) TEST(get_action_avail) { - __u32 actions[] = { SECCOMP_RET_KILL, SECCOMP_RET_TRAP, + __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; __u32 unknown_action = 0x10000000U; From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:01:39 -0700 Subject: [PATCH 015/279] seccomp: Introduce SECCOMP_RET_KILL_PROCESS This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill an entire process. This cannot yet be reached by seccomp, but it changes the default-kill behavior (for unknown return values) from kill-thread to kill-process. Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 18 ++++++++++-------- kernel/seccomp.c | 22 ++++++++++++++++------ 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 5a03f699eb17..7e77c92df78a 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -22,18 +22,20 @@ /* * All BPF programs must return a 32-bit value. * The bottom 16-bits are for optional return data. - * The upper 16-bits are ordered from least permissive values to most. + * The upper 16-bits are ordered from least permissive values to most, + * as a signed value (so 0x8000000 is negative). * * The ordering ensures that a min_t() over composed return values always * selects the least permissive choice. */ -#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ -#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ #define SECCOMP_RET_ACTION 0x7fff0000U diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 95ac54cff00f..5c7299b9d953 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, /* Ensure unexpected behavior doesn't result in failing open. */ if (unlikely(WARN_ON(f == NULL))) - return SECCOMP_RET_KILL_THREAD; + return SECCOMP_RET_KILL_PROCESS; if (!sd) { populate_seccomp_data(&sd_local); @@ -529,14 +529,16 @@ static void seccomp_send_sigsys(int syscall, int reason) #endif /* CONFIG_SECCOMP_FILTER */ /* For use with seccomp_actions_logged */ -#define SECCOMP_LOG_KILL_THREAD (1 << 0) +#define SECCOMP_LOG_KILL_PROCESS (1 << 0) +#define SECCOMP_LOG_KILL_THREAD (1 << 1) #define SECCOMP_LOG_TRAP (1 << 2) #define SECCOMP_LOG_ERRNO (1 << 3) #define SECCOMP_LOG_TRACE (1 << 4) #define SECCOMP_LOG_LOG (1 << 5) #define SECCOMP_LOG_ALLOW (1 << 6) -static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD | +static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS | + SECCOMP_LOG_KILL_THREAD | SECCOMP_LOG_TRAP | SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE | @@ -563,8 +565,11 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, log = seccomp_actions_logged & SECCOMP_LOG_LOG; break; case SECCOMP_RET_KILL_THREAD: - default: log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD; + break; + case SECCOMP_RET_KILL_PROCESS: + default: + log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS; } /* @@ -719,10 +724,12 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, return 0; case SECCOMP_RET_KILL_THREAD: + case SECCOMP_RET_KILL_PROCESS: default: seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ - if (get_nr_threads(current) == 1) { + if (action == SECCOMP_RET_KILL_PROCESS || + get_nr_threads(current) == 1) { siginfo_t info; /* Show the original registers in the dump. */ @@ -731,7 +738,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, seccomp_init_siginfo(&info, this_syscall, data); do_coredump(&info); } - do_exit(SIGSYS); + if (action == SECCOMP_RET_KILL_PROCESS) + do_group_exit(SIGSYS); + else + do_exit(SIGSYS); } unreachable(); From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:12:11 -0700 Subject: [PATCH 016/279] seccomp: Implement SECCOMP_RET_KILL_PROCESS action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, SECCOMP_RET_KILL_THREAD (neé SECCOMP_RET_KILL) kills the current thread. There have been a few requests for this to kill the entire process (the thread group). This cannot be just changed (discovered when adding coredump support since coredumping kills the entire process) because there are userspace programs depending on the thread-kill behavior. Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can be processed as "-1" by the kernel, below the existing RET_KILL that is ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only be visible when examining the siginfo in a core dump from a RET_KILL_*, where it will think it was thread-killed instead of process-killed. Attempts to introduce this behavior via other ways (filter flags, seccomp struct flags, masked RET_DATA bits) all come with weird side-effects and baggage. This change preserves the central behavioral expectations of the seccomp filter engine without putting too great a burden on changes needed in userspace to use the new action. The new action is discoverable by userspace through either the new actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp operation. If used without checking for availability, old kernels will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask will produce RET_KILL_THREAD). Cc: Paul Moore Cc: Fabricio Voznika Signed-off-by: Kees Cook --- Documentation/userspace-api/seccomp_filter.rst | 7 ++++++- include/uapi/linux/seccomp.h | 1 + kernel/seccomp.c | 9 +++++++-- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index d76396f2d8ed..099c412951d6 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -87,10 +87,15 @@ Return values A seccomp filter may return any of the following values. If multiple filters exist, the return value for the evaluation of a given system call will always use the highest precedent value. (For example, -``SECCOMP_RET_KILL_THREAD`` will always take precedence.) +``SECCOMP_RET_KILL_PROCESS`` will always take precedence.) In precedence order, they are: +``SECCOMP_RET_KILL_PROCESS``: + Results in the entire process exiting immediately without executing + the system call. The exit status of the task (``status & 0x7f``) + will be ``SIGSYS``, not ``SIGKILL``. + ``SECCOMP_RET_KILL_THREAD``: Results in the task exiting immediately without executing the system call. The exit status of the task (``status & 0x7f``) will diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 7e77c92df78a..f6bc1dea3247 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -38,6 +38,7 @@ #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ /* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION_FULL 0xffff0000U #define SECCOMP_RET_ACTION 0x7fff0000U #define SECCOMP_RET_DATA 0x0000ffffU diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5c7299b9d953..c24579dfa7a1 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -181,6 +181,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) * * Returns valid seccomp BPF response codes. */ +#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL))) static u32 seccomp_run_filters(const struct seccomp_data *sd, struct seccomp_filter **match) { @@ -206,7 +207,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, for (; f; f = f->prev) { u32 cur_ret = BPF_PROG_RUN(f->prog, sd); - if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) { + if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) { ret = cur_ret; *match = f; } @@ -650,7 +651,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, filter_ret = seccomp_run_filters(sd, &match); data = filter_ret & SECCOMP_RET_DATA; - action = filter_ret & SECCOMP_RET_ACTION; + action = filter_ret & SECCOMP_RET_ACTION_FULL; switch (action) { case SECCOMP_RET_ERRNO: @@ -890,6 +891,7 @@ static long seccomp_get_action_avail(const char __user *uaction) return -EFAULT; switch (action) { + case SECCOMP_RET_KILL_PROCESS: case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_TRAP: case SECCOMP_RET_ERRNO: @@ -1041,6 +1043,7 @@ out: #ifdef CONFIG_SYSCTL /* Human readable action names for friendly sysctl interaction */ +#define SECCOMP_RET_KILL_PROCESS_NAME "kill_process" #define SECCOMP_RET_KILL_THREAD_NAME "kill_thread" #define SECCOMP_RET_TRAP_NAME "trap" #define SECCOMP_RET_ERRNO_NAME "errno" @@ -1049,6 +1052,7 @@ out: #define SECCOMP_RET_ALLOW_NAME "allow" static const char seccomp_actions_avail[] = + SECCOMP_RET_KILL_PROCESS_NAME " " SECCOMP_RET_KILL_THREAD_NAME " " SECCOMP_RET_TRAP_NAME " " SECCOMP_RET_ERRNO_NAME " " @@ -1062,6 +1066,7 @@ struct seccomp_log_name { }; static const struct seccomp_log_name seccomp_log_names[] = { + { SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME }, { SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME }, { SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME }, { SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME }, From f3e1821d9e1cc3fb434d7763001791dcd6720c90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Aug 2017 13:20:33 -0700 Subject: [PATCH 017/279] selftests/seccomp: Test thread vs process killing This verifies that SECCOMP_RET_KILL_PROCESS is higher priority than SECCOMP_RET_KILL_THREAD. (This also moves a bunch of defines up earlier in the file to use them earlier.) Signed-off-by: Kees Cook Reviewed-by: Tyler Hicks --- tools/testing/selftests/seccomp/seccomp_bpf.c | 228 +++++++++++++----- 1 file changed, 168 insertions(+), 60 deletions(-) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index a3ba39a32449..0683cd543cd5 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -68,7 +68,17 @@ #define SECCOMP_MODE_FILTER 2 #endif -#ifndef SECCOMP_RET_KILL_THREAD +#ifndef SECCOMP_RET_ALLOW +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; +#endif + +#ifndef SECCOMP_RET_KILL_PROCESS +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ #define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ #endif #ifndef SECCOMP_RET_KILL @@ -82,17 +92,53 @@ #define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ #endif -#ifndef SECCOMP_RET_ACTION -/* Masks for the return value sections. */ -#define SECCOMP_RET_ACTION 0x7fff0000U -#define SECCOMP_RET_DATA 0x0000ffffU +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# elif defined(__hppa__) +# define __NR_seccomp 338 +# elif defined(__powerpc__) +# define __NR_seccomp 358 +# elif defined(__s390__) +# define __NR_seccomp 348 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif -struct seccomp_data { - int nr; - __u32 arch; - __u64 instruction_pointer; - __u64 args[6]; -}; +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif + +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 +#endif + +#ifndef SECCOMP_FILTER_FLAG_LOG +#define SECCOMP_FILTER_FLAG_LOG 2 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, void *args) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, args); +} #endif #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -550,6 +596,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS) close(fd); } +/* This is a thread task to die via seccomp filter violation. */ +void *kill_thread(void *data) +{ + bool die = (bool)data; + + if (die) { + prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + return (void *)SIBLING_EXIT_FAILURE; + } + + return (void *)SIBLING_EXIT_UNKILLED; +} + +/* Prepare a thread that will kill itself or both of us. */ +void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +{ + pthread_t thread; + void *status; + /* Kill only when calling __NR_prctl. */ + struct sock_filter filter_thread[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog_thread = { + .len = (unsigned short)ARRAY_SIZE(filter_thread), + .filter = filter_thread, + }; + struct sock_filter filter_process[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog_process = { + .len = (unsigned short)ARRAY_SIZE(filter_process), + .filter = filter_process, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, + kill_process ? &prog_process : &prog_thread)); + + /* + * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS + * flag cannot be downgraded by a new filter. + */ + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + + /* Start a thread that will exit immediately. */ + ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); + ASSERT_EQ(0, pthread_join(thread, &status)); + ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); + + /* Start a thread that will die immediately. */ + ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); + ASSERT_EQ(0, pthread_join(thread, &status)); + ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); + + /* + * If we get here, only the spawned thread died. Let the parent know + * the whole process didn't die (i.e. this thread, the spawner, + * stayed running). + */ + exit(42); +} + +TEST(KILL_thread) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, false); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If only the thread was killed, we'll see exit 42. */ + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(42, WEXITSTATUS(status)); +} + +TEST(KILL_process) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, true); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { @@ -1800,55 +1957,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -#ifndef __NR_seccomp -# if defined(__i386__) -# define __NR_seccomp 354 -# elif defined(__x86_64__) -# define __NR_seccomp 317 -# elif defined(__arm__) -# define __NR_seccomp 383 -# elif defined(__aarch64__) -# define __NR_seccomp 277 -# elif defined(__hppa__) -# define __NR_seccomp 338 -# elif defined(__powerpc__) -# define __NR_seccomp 358 -# elif defined(__s390__) -# define __NR_seccomp 348 -# else -# warning "seccomp syscall number unknown for this architecture" -# define __NR_seccomp 0xffff -# endif -#endif - -#ifndef SECCOMP_SET_MODE_STRICT -#define SECCOMP_SET_MODE_STRICT 0 -#endif - -#ifndef SECCOMP_SET_MODE_FILTER -#define SECCOMP_SET_MODE_FILTER 1 -#endif - -#ifndef SECCOMP_GET_ACTION_AVAIL -#define SECCOMP_GET_ACTION_AVAIL 2 -#endif - -#ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 -#endif - -#ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 -#endif - -#ifndef seccomp -int seccomp(unsigned int op, unsigned int flags, void *args) -{ - errno = 0; - return syscall(__NR_seccomp, op, flags, args); -} -#endif - TEST(seccomp_syscall) { struct sock_filter filter[] = { From 6849243bf4c6155151b294e9f0e0dc9540d6f083 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 16 Aug 2017 20:26:57 -0700 Subject: [PATCH 018/279] samples: Unrename SECCOMP_RET_KILL Since samples can still be built before header installs, avoid the cosmetic renaming of SECCOMP_RET_KILL to avoid build failures in -next. Cc: Stephen Rothwell Signed-off-by: Kees Cook --- samples/seccomp/bpf-direct.c | 4 ++-- samples/seccomp/bpf-helper.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c index 235ce3c49ee9..151ec3f52189 100644 --- a/samples/seccomp/bpf-direct.c +++ b/samples/seccomp/bpf-direct.c @@ -129,7 +129,7 @@ static int install_filter(void) /* Check that read is only using stdin. */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), /* Check that write is only using stdout */ BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)), @@ -139,7 +139,7 @@ static int install_filter(void) BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), }; struct sock_fprog prog = { .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h index 83dbe79cbe2c..1d8de9edd858 100644 --- a/samples/seccomp/bpf-helper.h +++ b/samples/seccomp/bpf-helper.h @@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count); #define ALLOW \ BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) #define DENY \ - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD) + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL) #define JUMP(labels, label) \ BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \ JUMP_JT, JUMP_JF) From a4e89ffb59235fd11d27107dea3efa4562ac0a12 Mon Sep 17 00:00:00 2001 From: Matt Weber Date: Wed, 28 Jun 2017 11:14:29 -0500 Subject: [PATCH 019/279] powerpc/e6500: Update machine check for L1D cache err This patch updates the machine check handler of Linux kernel to handle the e6500 architecture case. In e6500 core, L1 Data Cache Write Shadow Mode (DCWS) register is not implemented but L1 data cache always runs in write shadow mode. So, on L1 data cache parity errors, hardware will automatically invalidate the data cache but will still log a machine check interrupt. Signed-off-by: Ronak Desai Signed-off-by: Matthew Weber Signed-off-by: Scott Wood --- arch/powerpc/kernel/traps.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 675d5d2bfcde..410352acfa38 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -393,6 +393,7 @@ static inline int check_io_access(struct pt_regs *regs) int machine_check_e500mc(struct pt_regs *regs) { unsigned long mcsr = mfspr(SPRN_MCSR); + unsigned long pvr = mfspr(SPRN_PVR); unsigned long reason = mcsr; int recoverable = 1; @@ -434,8 +435,15 @@ int machine_check_e500mc(struct pt_regs *regs) * may still get logged and cause a machine check. We should * only treat the non-write shadow case as non-recoverable. */ - if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS)) - recoverable = 0; + /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit + * is not implemented but L1 data cache always runs in write + * shadow mode. Hence on data cache parity errors HW will + * automatically invalidate the L1 Data Cache. + */ + if (PVR_VER(pvr) != PVR_VER_E6500) { + if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS)) + recoverable = 0; + } } if (reason & MCSR_L2MMU_MHIT) { From 50dad5fb59d12dc9a5a0e96073ee1e5857ac45a2 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Wed, 30 Aug 2017 00:33:35 +0530 Subject: [PATCH 020/279] drm/amdkfd: remove memset before memcpy calling memcpy immediately after memset with the same region of memory makes memset redundant. Signed-off-by: Himanshu Jha Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 1cae95e2b13a..03bec765b03d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -143,7 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, int num_queues = 0; struct queue *cur; - memset(&q_properties, 0, sizeof(struct queue_properties)); memcpy(&q_properties, properties, sizeof(struct queue_properties)); q = NULL; kq = NULL; From 1fabbf781167052f4480bdcc627378a27e78a559 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 2 Sep 2017 15:00:25 +0300 Subject: [PATCH 021/279] drm/amdkfd: pass queue's mqd when destroying mqd In VI, the destroy mqd function needs to inquire fields present in the mqd structure. That's why we need to pass it to that function instead of NULL. Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 681b639f5133..0649dd43e780 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -183,7 +183,7 @@ static void uninitialize(struct kernel_queue *kq) { if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) kq->mqd->destroy_mqd(kq->mqd, - NULL, + kq->queue->mqd, false, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, kq->queue->pipe, From 3664847d95e60a9a943858b7800f8484669740fc Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 25 Aug 2017 10:40:02 -0700 Subject: [PATCH 022/279] md/raid5: fix a race condition in stripe batch We have a race condition in below scenario, say have 3 continuous stripes, sh1, sh2 and sh3, sh1 is the stripe_head of sh2 and sh3: CPU1 CPU2 CPU3 handle_stripe(sh3) stripe_add_to_batch_list(sh3) -> lock(sh2, sh3) -> lock batch_lock(sh1) -> add sh3 to batch_list of sh1 -> unlock batch_lock(sh1) clear_batch_ready(sh1) -> lock(sh1) and batch_lock(sh1) -> clear STRIPE_BATCH_READY for all stripes in batch_list -> unlock(sh1) and batch_lock(sh1) ->clear_batch_ready(sh3) -->test_and_clear_bit(STRIPE_BATCH_READY, sh3) --->return 0 as sh->batch == NULL -> sh3->batch_head = sh1 -> unlock (sh2, sh3) In CPU1, handle_stripe will continue handle sh3 even it's in batch stripe list of sh1. By moving sh3->batch_head assignment in to batch_lock, we make it impossible to clear STRIPE_BATCH_READY before batch_head is set. Thanks Stephane for helping debug this tricky issue. Reported-and-tested-by: Stephane Thiell Cc: stable@vger.kernel.org (v4.1+) Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 049a958d3c1e..90414a4e0d49 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -811,6 +811,14 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh spin_unlock(&head->batch_head->batch_lock); goto unlock_out; } + /* + * We must assign batch_head of this stripe within the + * batch_lock, otherwise clear_batch_ready of batch head + * stripe could clear BATCH_READY bit of this stripe and + * this stripe->batch_head doesn't get assigned, which + * could confuse clear_batch_ready for this stripe + */ + sh->batch_head = head->batch_head; /* * at this point, head's BATCH_READY could be cleared, but we @@ -818,8 +826,6 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh */ list_add(&sh->batch_list, &head->batch_list); spin_unlock(&head->batch_head->batch_lock); - - sh->batch_head = head->batch_head; } else { head->batch_head = head; sh->batch_head = head->batch_head; From 184a09eb9a2fe425e49c9538f1604b05ed33cfef Mon Sep 17 00:00:00 2001 From: Dennis Yang Date: Wed, 6 Sep 2017 11:02:35 +0800 Subject: [PATCH 023/279] md/raid5: preserve STRIPE_ON_UNPLUG_LIST in break_stripe_batch_list In release_stripe_plug(), if a stripe_head has its STRIPE_ON_UNPLUG_LIST set, it indicates that this stripe_head is already in the raid5_plug_cb list and release_stripe() would be called instead to drop a reference count. Otherwise, the STRIPE_ON_UNPLUG_LIST bit would be set for this stripe_head and it will get queued into the raid5_plug_cb list. Since break_stripe_batch_list() did not preserve STRIPE_ON_UNPLUG_LIST, A stripe could be re-added to plug list while it is still on that list in the following situation. If stripe_head A is added to another stripe_head B's batch list, in this case A will have its batch_head != NULL and be added into the plug list. After that, stripe_head B gets handled and called break_stripe_batch_list() to reset all the batched stripe_head(including A which is still on the plug list)'s state and reset their batch_head to NULL. Before the plug list gets processed, if there is another write request comes in and get stripe_head A, A will have its batch_head == NULL (cleared by calling break_stripe_batch_list() on B) and be added to plug list once again. Signed-off-by: Dennis Yang Cc: stable@vger.kernel.org (v4.1+) Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 90414a4e0d49..34c01e83395a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4605,7 +4605,8 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | (1 << STRIPE_PREREAD_ACTIVE) | - (1 << STRIPE_DEGRADED)), + (1 << STRIPE_DEGRADED) | + (1 << STRIPE_ON_UNPLUG_LIST)), head_sh->state & (1 << STRIPE_INSYNC)); sh->check_state = head_sh->check_state; From 01f5bbd17a8066b58dba9b5049fad504bce67322 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 Sep 2017 10:40:35 +0300 Subject: [PATCH 024/279] mmc: block: Fix incorrectly initialized requests mmc_init_request() depends on card->bouncesz so it must be calculated before blk_init_allocated_queue() starts allocating requests. Reported-by: Seraphime Kirkovski Fixes: 304419d8a7e9 ("mmc: core: Allocate per-request data using the..") Signed-off-by: Adrian Hunter Tested-by: Seraphime Kirkovski Cc: Signed-off-by: Ulf Hansson Tested-by: Pavel Machek --- drivers/mmc/core/queue.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index affa7370ba82..74c663b1c0a7 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -242,6 +242,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT; + /* + * mmc_init_request() depends on card->bouncesz so it must be calculated + * before blk_init_allocated_queue() starts allocating requests. + */ + card->bouncesz = mmc_queue_calc_bouncesz(host); + mq->card = card; mq->queue = blk_alloc_queue(GFP_KERNEL); if (!mq->queue) @@ -265,7 +271,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, if (mmc_can_erase(card)) mmc_queue_setup_discard(mq->queue, card); - card->bouncesz = mmc_queue_calc_bouncesz(host); if (card->bouncesz) { blk_queue_max_hw_sectors(mq->queue, card->bouncesz / 512); blk_queue_max_segments(mq->queue, card->bouncesz / 512); From b4f146f5faf85d21f5cd414531c3ced9c1b61e3c Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 5 Sep 2017 20:27:50 +0200 Subject: [PATCH 025/279] mmc: host: fix typo after MMC_DEBUG move MMC_DEBUG was moved and one letter got strangely capitalized. Signed-off-by: Wolfram Sang Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 02179ed2a40d..8c15637178ff 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -5,7 +5,7 @@ comment "MMC/SD/SDIO Host Controller Drivers" config MMC_DEBUG - bool "MMC host drivers debugginG" + bool "MMC host drivers debugging" depends on MMC != n help This is an option for use by developers; most people should From b917c6d18c031cfce11ec35139033845f205b1d0 Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Thu, 7 Sep 2017 13:24:17 +0200 Subject: [PATCH 026/279] mmc: cavium: Fix use-after-free in of_platform_device_destroy KASAN reported the following: [ 19.338655] ================================================================== [ 19.345946] BUG: KASAN: use-after-free in of_platform_device_destroy+0x88/0x100 [ 19.345966] Read of size 8 at addr fffffe01aa6f1468 by task systemd-udevd/264 [ 19.345983] CPU: 1 PID: 264 Comm: systemd-udevd Not tainted 4.13.0-jang+ #737 [ 19.345989] Hardware name: Cavium ThunderX CN81XX board (DT) [ 19.345995] Call trace: [ 19.346013] [] dump_backtrace+0x0/0x368 [ 19.346026] [] show_stack+0x24/0x30 [ 19.346040] [] dump_stack+0xa4/0xc8 [ 19.346057] [] print_address_description+0x68/0x258 [ 19.346070] [] kasan_report+0x238/0x2f8 [ 19.346082] [] __asan_load8+0x88/0xb8 [ 19.346098] [] of_platform_device_destroy+0x88/0x100 [ 19.346131] [] thunder_mmc_probe+0x314/0x550 [thunderx_mmc] [ 19.346147] [] pci_device_probe+0x158/0x1f8 [ 19.346162] [] driver_probe_device+0x394/0x5f8 [ 19.346174] [] __driver_attach+0x154/0x158 [ 19.346185] [] bus_for_each_dev+0xdc/0x140 [ 19.346196] [] driver_attach+0x38/0x48 [ 19.346207] [] bus_add_driver+0x290/0x3c8 [ 19.346219] [] driver_register+0xbc/0x1a0 [ 19.346232] [] __pci_register_driver+0xc4/0xd8 [ 19.346260] [] thunder_mmc_driver_init+0x24/0x10000 [thunderx_mmc] [ 19.346273] [] do_one_initcall+0x98/0x1c0 [ 19.346289] [] do_init_module+0xe0/0x2cc [ 19.346303] [] load_module+0x3238/0x35c0 [ 19.346318] [] SyS_finit_module+0x190/0x1a0 [ 19.346329] [] __sys_trace_return+0x0/0x4 This is caused by: platform_device_register() -> platform_device_unregister(to_platform_device(dev)) freeing struct device -> of_node_clear_flag(dev->of_node, ...) writing to the freed device The issue is solved by increasing the reference count before calling of_platform_device_destroy() so freeing the device is postponed after the call. Fixes: 8fb83b142823 ("mmc: cavium: Fix probing race with regulator") Signed-off-by: Jan Glauber Signed-off-by: Ulf Hansson --- drivers/mmc/host/cavium-thunderx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c index b9cc95998799..eee08d81b242 100644 --- a/drivers/mmc/host/cavium-thunderx.c +++ b/drivers/mmc/host/cavium-thunderx.c @@ -7,6 +7,7 @@ * * Copyright (C) 2016 Cavium Inc. */ +#include #include #include #include @@ -149,8 +150,11 @@ error: for (i = 0; i < CAVIUM_MAX_MMC; i++) { if (host->slot[i]) cvm_mmc_of_slot_remove(host->slot[i]); - if (host->slot_pdev[i]) + if (host->slot_pdev[i]) { + get_device(&host->slot_pdev[i]->dev); of_platform_device_destroy(&host->slot_pdev[i]->dev, NULL); + put_device(&host->slot_pdev[i]->dev); + } } clk_disable_unprepare(host->clk); return ret; From bf2afee14e07de16d3cafc67edbfc2a3cc65e4bc Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Fri, 8 Sep 2017 10:37:35 +1000 Subject: [PATCH 027/279] cifs: check rsp for NULL before dereferencing in SMB2_open In SMB2_open there are several paths where the SendReceive2 call will return an error before it sets rsp_iov.iov_base thus leaving iov_base uninitialized. Thus we need to check rsp before we dereference it in the call to get_rfc1002_length(). A report of this issue was previously reported in http://www.spinics.net/lists/linux-cifs/msg12846.html RH-bugzilla : 1476151 Version 2 : * Lets properly initialize rsp_iov before we use it. Signed-off-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky . Signed-off-by: Steve French Reported-by: Xiaoli Feng CC: Stable --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5531e7ee1210..69a751b038ab 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1634,7 +1634,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, struct cifs_tcon *tcon = oparms->tcon; struct cifs_ses *ses = tcon->ses; struct kvec iov[4]; - struct kvec rsp_iov; + struct kvec rsp_iov = {NULL, 0}; int resp_buftype; int uni_path_len; __le16 *copy_path = NULL; @@ -1763,7 +1763,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, if (rc != 0) { cifs_stats_fail_inc(tcon, SMB2_CREATE_HE); - if (err_buf) + if (err_buf && rsp) *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4, GFP_KERNEL); goto creat_exit; From fc3100d64f0ae383ae8d845989103da06d62763b Mon Sep 17 00:00:00 2001 From: Pu Hou Date: Tue, 5 Sep 2017 05:17:24 +0200 Subject: [PATCH 028/279] s390/perf: fix bug when creating per-thread event A per-thread event could not be created correctly like below: perf record --per-thread -e rB0000 -- sleep 1 Error: The sys_perf_event_open() syscall returned with 19 (No such device) for event (rB0000). /bin/dmesg may provide additional information. No CONFIG_PERF_EVENTS=y kernel support configured? This bug was introduced by: commit c311c797998c1e70eade463dd60b843da4f1a203 Author: Alexey Dobriyan Date: Mon May 8 15:56:15 2017 -0700 cpumask: make "nr_cpumask_bits" unsigned If a per-thread event is not attached to any CPU, the cpu field in struct perf_event is -1. The above commit converts the CPU number to unsigned int, which result in an illegal CPU number. Fixes: c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned") Cc: # v4.12+ Cc: Alexey Dobriyan Acked-by: Heiko Carstens Signed-off-by: Pu Hou Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c1bf75ffb875..7e1e40323b78 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -823,9 +823,12 @@ static int cpumsf_pmu_event_init(struct perf_event *event) } /* Check online status of the CPU to which the event is pinned */ - if ((unsigned int)event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) - return -ENODEV; + if (event->cpu >= 0) { + if ((unsigned int)event->cpu >= nr_cpumask_bits) + return -ENODEV; + if (!cpu_online(event->cpu)) + return -ENODEV; + } /* Force reset of idle/hv excludes regardless of what the * user requested. From 673514aff5d797b0cdb3f09097ae0e253b5667c3 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 12 Sep 2017 17:22:42 +0200 Subject: [PATCH 029/279] s390/dasd: fix race during dasd initialization Fix a panic in blk_mq_hctx_has_pending() that is caused by a racy call to blk_mq_run_hw_queues in a dasd function that might get called with the request queue not yet initialized during initialization. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ea19b4ff87a2..29f35e29d480 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1644,7 +1644,9 @@ void dasd_generic_handle_state_change(struct dasd_device *device) dasd_schedule_device_bh(device); if (device->block) { dasd_schedule_block_bh(device->block); - blk_mq_run_hw_queues(device->block->request_queue, true); + if (device->block->request_queue) + blk_mq_run_hw_queues(device->block->request_queue, + true); } } EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change); @@ -3759,7 +3761,9 @@ int dasd_generic_path_operational(struct dasd_device *device) dasd_schedule_device_bh(device); if (device->block) { dasd_schedule_block_bh(device->block); - blk_mq_run_hw_queues(device->block->request_queue, true); + if (device->block->request_queue) + blk_mq_run_hw_queues(device->block->request_queue, + true); } if (!device->stopped) @@ -4025,7 +4029,9 @@ int dasd_generic_restore_device(struct ccw_device *cdev) if (device->block) { dasd_schedule_block_bh(device->block); - blk_mq_run_hw_queues(device->block->request_queue, true); + if (device->block->request_queue) + blk_mq_run_hw_queues(device->block->request_queue, + true); } clear_bit(DASD_FLAG_SUSPENDED, &device->flags); From b468b6a4969f9bdddb31d484f151bfa03fbee767 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Sep 2017 13:54:36 +0200 Subject: [PATCH 030/279] scsi: scsi_transport_fc: fix NULL pointer dereference in fc_bsg_job_timeout bsg-lib now embeddeds the job structure into the request, and req->special can't be used anymore. Signed-off-by: Christoph Hellwig Cc: stable@vger.kernel.org Reviewed-by: Ming Lei Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 3c6bc0081fcb..ba9d70f8a6a1 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3571,7 +3571,7 @@ fc_vport_sched_delete(struct work_struct *work) static enum blk_eh_timer_return fc_bsg_job_timeout(struct request *req) { - struct bsg_job *job = (void *) req->special; + struct bsg_job *job = blk_mq_rq_to_pdu(req); struct Scsi_Host *shost = fc_bsg_to_shost(job); struct fc_rport *rport = fc_bsg_to_rport(job); struct fc_internal *i = to_fc_internal(shost->transportt); From fd536998314a9dc54f384fa16287321edb81602a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Sep 2017 22:00:57 +0200 Subject: [PATCH 031/279] scsi: acornscsi: fix build error A cleanup patch introduced a fatal typo from inbalanced curly braces: drivers/scsi/arm/acornscsi.c: In function 'acornscsi_host_reset': drivers/scsi/arm/acornscsi.c:2773:1: error: ISO C90 forbids mixed declarations and code [-Werror=declaration-after-statement] drivers/scsi/arm/acornscsi.c:2795:12: error: invalid storage class for function 'acornscsi_show_info' static int acornscsi_show_info(struct seq_file *m, struct Scsi_Host *instance) The same patch incorrectly changed the argument type of the reset handler, as shown by this warning: drivers/scsi/arm/acornscsi.c:2888:27: error: initialization of 'int (*)(struct scsi_cmnd *)' from incompatible pointer type 'int (*)(struct Scsi_Host *)' [-Werror=incompatible-pointer-types] .eh_host_reset_handler = acornscsi_host_reset, This removes one the extraneous opening brace and reverts the argument type change. [mkp: fixed checkpatch complaint] Fixes: 74fa80ee3fae ("scsi: acornscsi: move bus reset to host reset") Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/arm/acornscsi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index 690816f3c6af..421fe869a11e 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -2725,9 +2725,9 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt) * Params : SCpnt - command causing reset * Returns : one of SCSI_RESET_ macros */ -int acornscsi_host_reset(struct Scsi_Host *shpnt) +int acornscsi_host_reset(struct scsi_cmnd *SCpnt) { - AS_Host *host = (AS_Host *)shpnt->hostdata; + AS_Host *host = (AS_Host *)SCpnt->device->host->hostdata; struct scsi_cmnd *SCptr; host->stats.resets += 1; @@ -2741,7 +2741,7 @@ int acornscsi_host_reset(struct Scsi_Host *shpnt) printk(KERN_WARNING "acornscsi_reset: "); print_sbic_status(asr, ssr, host->scsi.phase); - for (devidx = 0; devidx < 9; devidx ++) { + for (devidx = 0; devidx < 9; devidx++) acornscsi_dumplog(host, devidx); } #endif From e785fa0a164aa11001cba931367c7f94ffaff888 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Wed, 13 Sep 2017 00:21:21 +0200 Subject: [PATCH 032/279] nl80211: check for the required netlink attributes presence nl80211_set_rekey_data() does not check if the required attributes NL80211_REKEY_DATA_{REPLAY_CTR,KEK,KCK} are present when processing NL80211_CMD_SET_REKEY_OFFLOAD request. This request can be issued by users with CAP_NET_ADMIN privilege and may result in NULL dereference and a system crash. Add a check for the required attributes presence. This patch is based on the patch by bo Zhang. This fixes CVE-2017-12153. References: https://bugzilla.redhat.com/show_bug.cgi?id=1491046 Fixes: e5497d766ad ("cfg80211/nl80211: support GTK rekey offload") Cc: # v3.1-rc1 Reported-by: bo Zhang Signed-off-by: Vladis Dronov Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0df8023f480b..fbd5593e88cb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -10903,6 +10903,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || + !tb[NL80211_REKEY_DATA_KCK]) + return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) return -ERANGE; if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) From f7f3dc00f61261cdc9ccd8b886f21bc4dffd6fd9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 7 Sep 2017 19:08:21 +0200 Subject: [PATCH 033/279] x86/cpu/AMD: Fix erratum 1076 (CPB bit) CPUID Fn8000_0007_EDX[CPB] is wrongly 0 on models up to B1. But they do support CPB (AMD's Core Performance Boosting cpufreq CPU feature), so fix that. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sherry Hurwitz Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170907170821.16021-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9862e2cd6d93..d58184b7cd44 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -763,6 +763,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c) } } +static void init_amd_zn(struct cpuinfo_x86 *c) +{ + /* + * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects + * all up to and including B1. + */ + if (c->x86_model <= 1 && c->x86_mask <= 1) + set_cpu_cap(c, X86_FEATURE_CPB); +} + static void init_amd(struct cpuinfo_x86 *c) { early_init_amd(c); @@ -791,6 +801,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x17: init_amd_zn(c); break; } /* Enable workaround for FXSAVE leak */ From 0998b7a0befdf6e734032895ee639a5e6f88cc3f Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Thu, 14 Sep 2017 08:01:38 +0200 Subject: [PATCH 034/279] objtool: Fix memory leak in elf_create_rela_section() Let's free the allocated char array 'relaname' before returning, in order to avoid leaking memory. Signed-off-by: Martin Kepplinger Acked-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: mingo.kernel.org@gmail.com Link: http://lkml.kernel.org/r/20170914060138.26472-1-martink@posteo.de Signed-off-by: Ingo Molnar --- tools/objtool/elf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 6e9f980a7d26..1e89a5f8bfc9 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -508,6 +508,7 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base) strcat(relaname, base->name); sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0); + free(relaname); if (!sec) return NULL; From df968c9329f6e5cf3596a0a54adb6f749747a746 Mon Sep 17 00:00:00 2001 From: Petr Vandrovec Date: Fri, 15 Sep 2017 02:15:05 -0500 Subject: [PATCH 035/279] objtool: Do not retrieve data from empty sections Binutils 2.29-9 in Debian return an error when elf_getdata is invoked on empty section (.note.GNU-stack in all kernel files), causing immediate failure of kernel build with: elf_getdata: can't manipulate null section As nothing is done with sections that have zero size, just do not retrieve their data at all. Signed-off-by: Petr Vandrovec Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2ce30a44349065b70d0f00e71e286dc0cbe745e6.1505459652.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/elf.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 1e89a5f8bfc9..b4cd8bc62521 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -175,19 +175,20 @@ static int read_sections(struct elf *elf) return -1; } - sec->data = elf_getdata(s, NULL); - if (!sec->data) { - WARN_ELF("elf_getdata"); - return -1; + if (sec->sh.sh_size != 0) { + sec->data = elf_getdata(s, NULL); + if (!sec->data) { + WARN_ELF("elf_getdata"); + return -1; + } + if (sec->data->d_off != 0 || + sec->data->d_size != sec->sh.sh_size) { + WARN("unexpected data attributes for %s", + sec->name); + return -1; + } } - - if (sec->data->d_off != 0 || - sec->data->d_size != sec->sh.sh_size) { - WARN("unexpected data attributes for %s", sec->name); - return -1; - } - - sec->len = sec->data->d_size; + sec->len = sec->sh.sh_size; } /* sanity check, one more call to elf_nextscn() should return NULL */ From 97dab2ae7e8473a821f72a039ead0f36b12ba22d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 15 Sep 2017 02:17:11 -0500 Subject: [PATCH 036/279] objtool: Fix object file corruption Arnd Bergmann reported that a randconfig build was failing with the following link error: built-in.o: member arch/x86/kernel/time.o in archive is not an object It turns out the link failed because the time.o file had been corrupted by objtool: nm: arch/x86/kernel/time.o: File format not recognized In certain rare cases, when a .o file's ORC table is very small, the .data section size doesn't change because it's page aligned. Because all the existing sections haven't changed size, libelf doesn't detect any section header changes, and so it doesn't update the section header table properly. Instead it writes junk in the section header entries for the new ORC sections. Make sure libelf properly updates the section header table by setting the ELF_F_DIRTY flag in the top level elf struct. Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 627fce14809b ("objtool: Add ORC unwind table generation") Link: http://lkml.kernel.org/r/e650fd0f2d8a209d1409a9785deb101fdaed55fb.1505459813.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/elf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index b4cd8bc62521..24460155c82c 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -563,6 +563,7 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; + /* Update section headers for changed sections: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { s = elf_getscn(elf->elf, sec->idx); @@ -570,13 +571,17 @@ int elf_write(struct elf *elf) WARN_ELF("elf_getscn"); return -1; } - if (!gelf_update_shdr (s, &sec->sh)) { + if (!gelf_update_shdr(s, &sec->sh)) { WARN_ELF("gelf_update_shdr"); return -1; } } } + /* Make sure the new section header entries get updated properly. */ + elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY); + + /* Write all changes to the file. */ if (elf_update(elf->elf, ELF_C_WRITE) < 0) { WARN_ELF("elf_update"); return -1; From 9c95be0163a0a699ed7eda4727f485f8453580ef Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 13 Sep 2017 16:29:46 +0200 Subject: [PATCH 037/279] scsi: sd: Remove unnecessary condition in sd_read_block_limits() After series of changes around WRITE_SAME and UNMAP setup we ended up with leftover unnecessary condition. Remove it. Signed-off-by: Lukas Czerner Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 11c1738c2100..fb9f8b5f4673 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2915,8 +2915,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp) sd_config_discard(sdkp, SD_LBP_WS16); else if (sdkp->lbpws10) sd_config_discard(sdkp, SD_LBP_WS10); - else if (sdkp->lbpu && sdkp->max_unmap_blocks) - sd_config_discard(sdkp, SD_LBP_UNMAP); else sd_config_discard(sdkp, SD_LBP_DISABLE); } From 4759df905a474d245752c9dc94288e779b8734dd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 15 Sep 2017 14:05:15 +0200 Subject: [PATCH 038/279] scsi: sg: factor out sg_fill_request_table() Factor out sg_fill_request_table() for better readability. [mkp: typos, applied by hand] Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 61 +++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index cf0e71db9e51..1acdb6e03999 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -828,6 +828,40 @@ static int max_sectors_bytes(struct request_queue *q) return max_sectors << 9; } +static void +sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) +{ + Sg_request *srp; + int val; + unsigned int ms; + + val = 0; + list_for_each_entry(srp, &sfp->rq_list, entry) { + if (val > SG_MAX_QUEUE) + break; + memset(&rinfo[val], 0, SZ_SG_REQ_INFO); + rinfo[val].req_state = srp->done + 1; + rinfo[val].problem = + srp->header.masked_status & + srp->header.host_status & + srp->header.driver_status; + if (srp->done) + rinfo[val].duration = + srp->header.duration; + else { + ms = jiffies_to_msecs(jiffies); + rinfo[val].duration = + (ms > srp->header.duration) ? + (ms - srp->header.duration) : 0; + } + rinfo[val].orphan = srp->orphan; + rinfo[val].sg_io_owned = srp->sg_io_owned; + rinfo[val].pack_id = srp->header.pack_id; + rinfo[val].usr_ptr = srp->header.usr_ptr; + val++; + } +} + static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { @@ -1012,38 +1046,13 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) return -EFAULT; else { sg_req_info_t *rinfo; - unsigned int ms; rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); - val = 0; - list_for_each_entry(srp, &sfp->rq_list, entry) { - if (val >= SG_MAX_QUEUE) - break; - memset(&rinfo[val], 0, SZ_SG_REQ_INFO); - rinfo[val].req_state = srp->done + 1; - rinfo[val].problem = - srp->header.masked_status & - srp->header.host_status & - srp->header.driver_status; - if (srp->done) - rinfo[val].duration = - srp->header.duration; - else { - ms = jiffies_to_msecs(jiffies); - rinfo[val].duration = - (ms > srp->header.duration) ? - (ms - srp->header.duration) : 0; - } - rinfo[val].orphan = srp->orphan; - rinfo[val].sg_io_owned = srp->sg_io_owned; - rinfo[val].pack_id = srp->header.pack_id; - rinfo[val].usr_ptr = srp->header.usr_ptr; - val++; - } + sg_fill_request_table(sfp, rinfo); read_unlock_irqrestore(&sfp->rq_list_lock, iflags); result = __copy_to_user(p, rinfo, SZ_SG_REQ_INFO * SG_MAX_QUEUE); From 3e0097499839e0fe3af380410eababe5a47c4cf9 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 15 Sep 2017 14:05:16 +0200 Subject: [PATCH 039/279] scsi: sg: fixup infoleak when using SG_GET_REQUEST_TABLE When calling SG_GET_REQUEST_TABLE ioctl only a half-filled table is returned; the remaining part will then contain stale kernel memory information. This patch zeroes out the entire table to avoid this issue. Signed-off-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Eric Dumazet Signed-off-by: Martin K. Petersen --- drivers/scsi/sg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 1acdb6e03999..0419c2298eab 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -839,7 +839,6 @@ sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) list_for_each_entry(srp, &sfp->rq_list, entry) { if (val > SG_MAX_QUEUE) break; - memset(&rinfo[val], 0, SZ_SG_REQ_INFO); rinfo[val].req_state = srp->done + 1; rinfo[val].problem = srp->header.masked_status & @@ -1047,8 +1046,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) else { sg_req_info_t *rinfo; - rinfo = kmalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, - GFP_KERNEL); + rinfo = kzalloc(SZ_SG_REQ_INFO * SG_MAX_QUEUE, + GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); From 51ae253870f55e14ba2854ce9577ac2920efef0c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Sep 2017 21:29:13 +0200 Subject: [PATCH 040/279] xen: x86: mark xen_find_pt_base as __init gcc-4.6 causes a harmless link-time warning: WARNING: vmlinux.o(.text.unlikely+0x48e): Section mismatch in reference from the function xen_find_pt_base() to the function .init.text:m2p() The function xen_find_pt_base() references the function __init m2p(). This is often because xen_find_pt_base lacks a __init annotation or the annotation of m2p is wrong. Newer compilers inline this function, so it never shows up, but marking it __init is the right way to avoid the warning. Fixes: 70e61199559a ("xen: move p2m list if conflicting with e820 map") Signed-off-by: Arnd Bergmann Signed-off-by: Boris Ostrovsky --- arch/x86/xen/mmu_pv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 0422ee7e70b3..ddfeebc2b125 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2221,7 +2221,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) * not the first page table in the page table pool. * Iterate through the initial page tables to find the real page table base. */ -static phys_addr_t xen_find_pt_base(pmd_t *pmd) +static phys_addr_t __init xen_find_pt_base(pmd_t *pmd) { phys_addr_t pt_base, paddr; unsigned pmdidx; From fd7d56270b526ca3ed0c224362e3c64a0f86687a Mon Sep 17 00:00:00 2001 From: John Ogness Date: Thu, 14 Sep 2017 11:42:17 +0200 Subject: [PATCH 041/279] fs/proc: Report eip/esp in /prod/PID/stat for coredumping Commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") stopped reporting eip/esp because it is racy and dangerous for executing tasks. The comment adds: As far as I know, there are no use programs that make any material use of these fields, so just get rid of them. However, existing userspace core-dump-handler applications (for example, minicoredumper) are using these fields since they provide an excellent cross-platform interface to these valuable pointers. So that commit introduced a user space visible regression. Partially revert the change and make the readout possible for tasks with the proper permissions and only if the target task has the PF_DUMPCORE flag set. Fixes: 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in> /proc/PID/stat") Reported-by: Marco Felsch Signed-off-by: John Ogness Reviewed-by: Andy Lutomirski Cc: Tycho Andersen Cc: Kees Cook Cc: Peter Zijlstra Cc: Brian Gerst Cc: stable@vger.kernel.org Cc: Tetsuo Handa Cc: Borislav Petkov Cc: Al Viro Cc: Linux API Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/87poatfwg6.fsf@linutronix.de Signed-off-by: Thomas Gleixner --- fs/proc/array.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/proc/array.c b/fs/proc/array.c index 88c355574aa0..525157ca25cb 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include @@ -421,7 +422,15 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, * esp and eip are intentionally zeroed out. There is no * non-racy way to read them without freezing the task. * Programs that need reliable values can use ptrace(2). + * + * The only exception is if the task is core dumping because + * a program is not able to use ptrace(2) in that case. It is + * safe because the task has stopped executing permanently. */ + if (permitted && (task->flags & PF_DUMPCORE)) { + eip = KSTK_EIP(task); + esp = KSTK_ESP(task); + } } get_task_comm(tcomm, task); From 9cb067ef8a10bb13112e4d1c0ea996ec96527422 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:03 +0200 Subject: [PATCH 042/279] genirq: Fix cpumask check in __irq_startup_managed() The result of cpumask_any_and() is invalid when result greater or equal nr_cpu_ids. The current check is checking for greater only. Fix it. Fixes: 761ea388e8c4 ("genirq: Handle managed irqs gracefully in irq_startup()") Signed-off-by: Thomas Gleixner Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Tony Luck Cc: Chen Yu Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: stable@vger.kernel.org Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: http://lkml.kernel.org/r/20170913213152.272283444@linutronix.de --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index f51b7b6d2451..6fc89fd93824 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -202,7 +202,7 @@ __irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force) irqd_clr_managed_shutdown(d); - if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) { + if (cpumask_any_and(aff, cpu_online_mask) >= nr_cpu_ids) { /* * Catch code which fiddles with enable_irq() on a managed * and potentially shutdown IRQ. Chained interrupt From 6d57339890c9a9dfcd4ba4f8931b911b962968e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 15 Sep 2017 17:08:16 +0200 Subject: [PATCH 043/279] dma-coherent: fix rmem_dma_device_init regression My recent bug fix introduced another bug, which caused rmem_dma_device_init to always fail, as rmem->priv is never set to anything. This restores the previous behavior, calling dma_init_coherent_memory() whenever ->priv is NULL. Fixes: d35b0996fef3 ("dma-coherent: fix dma_declare_coherent_memory() logic error") Reported-by: Roy Pledge Tested-by: Roy Pledge Signed-off-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- drivers/base/dma-coherent.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index a39b2166b145..744f64f43454 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c @@ -348,16 +348,15 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev) struct dma_coherent_mem *mem = rmem->priv; int ret; - if (!mem) - return -ENODEV; - - ret = dma_init_coherent_memory(rmem->base, rmem->base, rmem->size, - DMA_MEMORY_EXCLUSIVE, &mem); - - if (ret) { - pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n", - &rmem->base, (unsigned long)rmem->size / SZ_1M); - return ret; + if (!mem) { + ret = dma_init_coherent_memory(rmem->base, rmem->base, + rmem->size, + DMA_MEMORY_EXCLUSIVE, &mem); + if (ret) { + pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n", + &rmem->base, (unsigned long)rmem->size / SZ_1M); + return ret; + } } mem->use_dev_dma_pfn_offset = true; rmem->priv = mem; From ec11653b531099ddc08a8c7eb495ab83cae84e19 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 14 Sep 2017 14:51:20 -0500 Subject: [PATCH 044/279] CIFS/SMB3: Update documentation to reflect SMB3 and various changes Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Reviewed-by: Pavel Shilovsky --- Documentation/filesystems/cifs/AUTHORS | 5 ++ Documentation/filesystems/cifs/README | 81 ++++++++++++------------- Documentation/filesystems/cifs/TODO | 72 +++++++++++----------- Documentation/filesystems/cifs/cifs.txt | 24 +++++--- 4 files changed, 91 insertions(+), 91 deletions(-) diff --git a/Documentation/filesystems/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS index c98800df677f..9f4f87e16240 100644 --- a/Documentation/filesystems/cifs/AUTHORS +++ b/Documentation/filesystems/cifs/AUTHORS @@ -41,6 +41,11 @@ Igor Mammedov (DFS support) Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code) Scott Lovenberg Pavel Shilovsky (for great work adding SMB2 support, and various SMB3 features) +Aurelien Aptel (for DFS SMB3 work and some key bug fixes) +Ronnie Sahlberg (for SMB3 xattr work and bug fixes) +Shirish Pargaonkar (for many ACL patches over the years) +Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security) + Test case and Bug Report contributors ------------------------------------- diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README index a54788405429..a9da51553ba3 100644 --- a/Documentation/filesystems/cifs/README +++ b/Documentation/filesystems/cifs/README @@ -1,10 +1,14 @@ -The CIFS VFS support for Linux supports many advanced network filesystem -features such as hierarchical dfs like namespace, hardlinks, locking and more. +This module supports the SMB3 family of advanced network protocols (as well +as older dialects, originally called "CIFS" or SMB1). + +The CIFS VFS module for Linux supports many advanced network filesystem +features such as hierarchical DFS like namespace, hardlinks, locking and more. It was designed to comply with the SNIA CIFS Technical Reference (which supersedes the 1992 X/Open SMB Standard) as well as to perform best practice practical interoperability with Windows 2000, Windows XP, Samba and equivalent servers. This code was developed in participation with the Protocol Freedom -Information Foundation. +Information Foundation. CIFS and now SMB3 has now become a defacto +standard for interoperating between Macs and Windows and major NAS appliances. Please see http://protocolfreedom.org/ and @@ -15,30 +19,11 @@ for more details. For questions or bug reports please contact: sfrench@samba.org (sfrench@us.ibm.com) +See the project page at: https://wiki.samba.org/index.php/LinuxCIFS_utils + Build instructions: ================== -For Linux 2.4: -1) Get the kernel source (e.g.from http://www.kernel.org) -and download the cifs vfs source (see the project page -at http://us1.samba.org/samba/Linux_CIFS_client.html) -and change directory into the top of the kernel directory -then patch the kernel (e.g. "patch -p1 < cifs_24.patch") -to add the cifs vfs to your kernel configure options if -it has not already been added (e.g. current SuSE and UL -users do not need to apply the cifs_24.patch since the cifs vfs is -already in the kernel configure menu) and then -mkdir linux/fs/cifs and then copy the current cifs vfs files from -the cifs download to your kernel build directory e.g. - - cp /fs/cifs/* to /fs/cifs - -2) make menuconfig (or make xconfig) -3) select cifs from within the network filesystem choices -4) save and exit -5) make dep -6) make modules (or "make" if CIFS VFS not to be built as a module) - -For Linux 2.6: +For Linux: 1) Download the kernel (e.g. from http://www.kernel.org) and change directory into the top of the kernel directory tree (e.g. /usr/src/linux-2.5.73) @@ -61,16 +46,13 @@ would simply type "make install"). If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on the CIFS VFS web site) copy it to the same directory in which mount.smbfs and similar files reside (usually /sbin). Although the helper software is not -required, mount.cifs is recommended. Eventually the Samba 3.0 utility program -"net" may also be helpful since it may someday provide easier mount syntax for -users who are used to Windows e.g. - net use +required, mount.cifs is recommended. Most distros include a "cifs-utils" +package that includes this utility so it is recommended to install this. + Note that running the Winbind pam/nss module (logon service) on all of your Linux clients is useful in mapping Uids and Gids consistently across the domain to the proper network user. The mount.cifs mount helper can be -trivially built from Samba 3.0 or later source e.g. by executing: - - gcc samba/source/client/mount.cifs.c -o mount.cifs +found at cifs-utils.git on git.samba.org If cifs is built as a module, then the size and number of network buffers and maximum number of simultaneous requests to one server can be configured. @@ -79,6 +61,18 @@ Changing these from their defaults is not recommended. By executing modinfo on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made at module initialization time (by running insmod cifs.ko) can be seen. +Recommendations +=============== +To improve security the SMB2.1 dialect or later (usually will get SMB3) is now +the new default. To use old dialects (e.g. to mount Windows XP) use "vers=1.0" +on mount (or vers=2.0 for Windows Vista). Note that the CIFS (vers=1.0) is +much older and less secure than the default dialect SMB3 which includes +many advanced security features such as downgrade attack detection +and encrypted shares and stronger signing and authentication algorithms. +There are additional mount options that may be helpful for SMB3 to get +improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1): + "mfsymlinks" and "cifsacl" and "idsfromsid" + Allowing User Mounts ==================== To permit users to mount and unmount over directories they own is possible @@ -98,9 +92,7 @@ and execution of suid programs on the remote target would be enabled by default. This can be changed, as with nfs and other filesystems, by simply specifying "nosuid" among the mount options. For user mounts though to be able to pass the suid flag to mount requires rebuilding -mount.cifs with the following flag: - - gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs +mount.cifs with the following flag: CIFS_ALLOW_USR_SUID There is a corresponding manual page for cifs mounting in the Samba 3.0 and later source tree in docs/manpages/mount.cifs.8 @@ -189,18 +181,18 @@ applications running on the same server as Samba. Use instructions: ================ Once the CIFS VFS support is built into the kernel or installed as a module -(cifs.o), you can use mount syntax like the following to access Samba or Windows -servers: +(cifs.ko), you can use mount syntax like the following to access Samba or +Mac or Windows servers: - mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword + mount -t cifs //9.53.216.11/e$ /mnt -o username=myname,password=mypassword Before -o the option -v may be specified to make the mount.cifs mount helper display the mount steps more verbosely. After -o the following commonly used cifs vfs specific options are supported: - user= - pass= + username= + password= domain= Other cifs mount options are described below. Use of TCP names (in addition to @@ -246,13 +238,16 @@ the Server's registry. Samba starting with version 3.10 will allow such filenames (ie those which contain valid Linux characters, which normally would be forbidden for Windows/CIFS semantics) as long as the server is configured for Unix Extensions (and the client has not disabled -/proc/fs/cifs/LinuxExtensionsEnabled). - +/proc/fs/cifs/LinuxExtensionsEnabled). In addition the mount option +"mapposix" can be used on CIFS (vers=1.0) to force the mapping of +illegal Windows/NTFS/SMB characters to a remap range (this mount parm +is the default for SMB3). This remap ("mapposix") range is also +compatible with Mac (and "Services for Mac" on some older Windows). CIFS VFS Mount Options ====================== A partial list of the supported mount options follows: - user The user name to use when trying to establish + username The user name to use when trying to establish the CIFS session. password The user password. If the mount helper is installed, the user will be prompted for password diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO index 066ffddc3964..396ecfd6ff4a 100644 --- a/Documentation/filesystems/cifs/TODO +++ b/Documentation/filesystems/cifs/TODO @@ -1,4 +1,4 @@ -Version 2.03 August 1, 2014 +Version 2.04 September 13, 2017 A Partial List of Missing Features ================================== @@ -8,73 +8,69 @@ for visible, important contributions to this module. Here is a partial list of the known problems and missing features: a) SMB3 (and SMB3.02) missing optional features: - - RDMA + - RDMA (started) - multichannel (started) - directory leases (improved metadata caching) - T10 copy offload (copy chunk is only mechanism supported) - - encrypted shares b) improved sparse file support c) Directory entry caching relies on a 1 second timer, rather than -using FindNotify or equivalent. - (started) +using Directory Leases d) quota support (needs minor kernel change since quota calls to make it to network filesystems or deviceless filesystems) -e) improve support for very old servers (OS/2 and Win9x for example) -Including support for changing the time remotely (utimes command). +e) Better optimize open to reduce redundant opens (using reference +counts more) and to improve use of compounding in SMB3 to reduce +number of roundtrips. -f) hook lower into the sockets api (as NFS/SunRPC does) to avoid the -extra copy in/out of the socket buffers in some cases. - -g) Better optimize open (and pathbased setfilesize) to reduce the -oplock breaks coming from windows srv. Piggyback identical file -opens on top of each other by incrementing reference count rather -than resending (helps reduce server resource utilization and avoid -spurious oplock breaks). - -h) Add support for storing symlink info to Windows servers -in the Extended Attribute format their SFU clients would recognize. - -i) Finish inotify support so kde and gnome file list windows +f) Finish inotify support so kde and gnome file list windows will autorefresh (partially complete by Asser). Needs minor kernel vfs change to support removing D_NOTIFY on a file. -j) Add GUI tool to configure /proc/fs/cifs settings and for display of +g) Add GUI tool to configure /proc/fs/cifs settings and for display of the CIFS statistics (started) -k) implement support for security and trusted categories of xattrs +h) implement support for security and trusted categories of xattrs (requires minor protocol extension) to enable better support for SELINUX -l) Implement O_DIRECT flag on open (already supported on mount) +i) Implement O_DIRECT flag on open (already supported on mount) -m) Create UID mapping facility so server UIDs can be mapped on a per +j) Create UID mapping facility so server UIDs can be mapped on a per mount or a per server basis to client UIDs or nobody if no mapping -exists. This is helpful when Unix extensions are negotiated to -allow better permission checking when UIDs differ on the server -and client. Add new protocol request to the CIFS protocol -standard for asking the server for the corresponding name of a -particular uid. +exists. Also better integration with winbind for resolving SID owners -n) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too) +k) Add tools to take advantage of more smb3 specific ioctls and features -o) mount check for unmatched uids +l) encrypted file support -p) Add support for new vfs entry point for fallocate +m) improved stats gathering, tools (perhaps integration with nfsometer?) -q) Add tools to take advantage of cifs/smb3 specific ioctls and features -such as "CopyChunk" (fast server side file copy) +n) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed +file attribute via chflags) and improve user space tools for managing and +viewing them. -r) encrypted file support +o) mount helper GUI (to simplify the various configuration options on mount) -s) improved stats gathering, tools (perhaps integration with nfsometer?) +p) autonegotiation of dialects (offering more than one dialect ie SMB3.02, +SMB3, SMB2.1 not just SMB3). -t) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed -file attribute via chflags) +q) Allow mount.cifs to be more verbose in reporting errors with dialect +or unsupported feature errors. -u) mount helper GUI (to simplify the various configuration options on mount) +r) updating cifs documentation, and user guid. +s) Addressing bugs found by running a broader set of xfstests in standard +file system xfstest suite. + +t) split cifs and smb3 support into separate modules so legacy (and less +secure) CIFS dialect can be disabled in environments that don't need it +and simplify the code. + +u) Finish up SMB3.1.1 dialect support + +v) POSIX Extensions for SMB3.1.1 KNOWN BUGS ==================================== diff --git a/Documentation/filesystems/cifs/cifs.txt b/Documentation/filesystems/cifs/cifs.txt index 2fac91ac96cf..67756607246e 100644 --- a/Documentation/filesystems/cifs/cifs.txt +++ b/Documentation/filesystems/cifs/cifs.txt @@ -1,24 +1,28 @@ - This is the client VFS module for the Common Internet File System - (CIFS) protocol which is the successor to the Server Message Block + This is the client VFS module for the SMB3 NAS protocol as well + older dialects such as the Common Internet File System (CIFS) + protocol which was the successor to the Server Message Block (SMB) protocol, the native file sharing mechanism for most early PC operating systems. New and improved versions of CIFS are now called SMB2 and SMB3. These dialects are also supported by the CIFS VFS module. CIFS is fully supported by network - file servers such as Windows 2000, 2003, 2008 and 2012 + file servers such as Windows 2000, 2003, 2008, 2012 and 2016 as well by Samba (which provides excellent CIFS - server support for Linux and many other operating systems), so + server support for Linux and many other operating systems), Apple + systems, as well as most Network Attached Storage vendors, so this network filesystem client can mount to a wide variety of servers. The intent of this module is to provide the most advanced network - file system function for CIFS compliant servers, including better - POSIX compliance, secure per-user session establishment, high - performance safe distributed caching (oplock), optional packet + file system function for SMB3 compliant servers, including advanced + security features, excellent parallelized high performance i/o, better + POSIX compliance, secure per-user session establishment, encryption, + high performance safe distributed caching (leases/oplocks), optional packet signing, large files, Unicode support and other internationalization improvements. Since both Samba server and this filesystem client support - the CIFS Unix extensions, the combination can provide a reasonable - alternative to NFSv4 for fileserving in some Linux to Linux environments, - not just in Linux to Windows environments. + the CIFS Unix extensions (and in the future SMB3 POSIX extensions), + the combination can provide a reasonable alternative to other network and + cluster file systems for fileserving in some Linux to Linux environments, + not just in Linux to Windows (or Linux to Mac) environments. This filesystem has an mount utility (mount.cifs) that can be obtained from From 9764c02fcbad40001fd3f63558d918e4d519bb75 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 17 Sep 2017 10:41:35 -0500 Subject: [PATCH 045/279] SMB3: Add support for multidialect negotiate (SMB2.1 and later) With the need to discourage use of less secure dialect, SMB1 (CIFS), we temporarily upgraded the dialect to SMB3 in 4.13, but since there are various servers which only support SMB2.1 (2.1 is more secure than CIFS/SMB1) but not optimal for a default dialect - add support for multidialect negotiation. cifs.ko will now request SMB2.1 or later (ie SMB2.1 or SMB3.0, SMB3.02) and the server will pick the latest most secure one it can support. In addition since we are sending multidialect negotiate, add support for secure negotiate to validate that a man in the middle didn't downgrade us. Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky CC: Stable # 4.13+ --- fs/cifs/cifsglob.h | 6 ++++ fs/cifs/connect.c | 24 +++++++++---- fs/cifs/smb2ops.c | 40 ++++++++++++++++++++++ fs/cifs/smb2pdu.c | 85 ++++++++++++++++++++++++++++++++++++++++------ fs/cifs/smb2pdu.h | 2 +- 5 files changed, 139 insertions(+), 18 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 808486c29f0d..de5b2e1fcce5 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -188,6 +188,8 @@ enum smb_version { #ifdef CONFIG_CIFS_SMB311 Smb_311, #endif /* SMB311 */ + Smb_3any, + Smb_default, Smb_version_err }; @@ -1701,6 +1703,10 @@ extern struct smb_version_values smb20_values; #define SMB21_VERSION_STRING "2.1" extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; +#define SMBDEFAULT_VERSION_STRING "default" +extern struct smb_version_values smbdefault_values; +#define SMB3ANY_VERSION_STRING "3" +extern struct smb_version_values smb3any_values; #define SMB30_VERSION_STRING "3.0" extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5aa2d278ca84..8d38b22afb2b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -301,6 +301,8 @@ static const match_table_t cifs_smb_version_tokens = { { Smb_311, SMB311_VERSION_STRING }, { Smb_311, ALT_SMB311_VERSION_STRING }, #endif /* SMB311 */ + { Smb_3any, SMB3ANY_VERSION_STRING }, + { Smb_default, SMBDEFAULT_VERSION_STRING }, { Smb_version_err, NULL } }; @@ -1148,6 +1150,14 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol) vol->vals = &smb311_values; break; #endif /* SMB311 */ + case Smb_3any: + vol->ops = &smb30_operations; /* currently identical with 3.0 */ + vol->vals = &smb3any_values; + break; + case Smb_default: + vol->ops = &smb30_operations; /* currently identical with 3.0 */ + vol->vals = &smbdefault_values; + break; default: cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value); return 1; @@ -1274,9 +1284,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, vol->actimeo = CIFS_DEF_ACTIMEO; - /* FIXME: add autonegotiation for SMB3 or later rather than just SMB3 */ - vol->ops = &smb30_operations; /* both secure and accepted widely */ - vol->vals = &smb30_values; + /* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */ + vol->ops = &smb30_operations; + vol->vals = &smbdefault_values; vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT; @@ -1988,11 +1998,10 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (got_version == false) pr_warn("No dialect specified on mount. Default has changed to " - "a more secure dialect, SMB3 (vers=3.0), from CIFS " + "a more secure dialect, SMB2.1 or later (e.g. SMB3), from CIFS " "(SMB1). To use the less secure SMB1 dialect to access " - "old servers which do not support SMB3 specify vers=1.0" - " on mount. For somewhat newer servers such as Windows " - "7 try vers=2.1.\n"); + "old servers which do not support SMB3 (or SMB2.1) specify vers=1.0" + " on mount.\n"); kfree(mountdata_copy); return 0; @@ -2133,6 +2142,7 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol) if (vol->nosharesock) return 0; + /* BB update this for smb3any and default case */ if ((server->vals != vol->vals) || (server->ops != vol->ops)) return 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index fb2934b9b97c..405a9bf13aac 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3110,6 +3110,46 @@ struct smb_version_values smb21_values = { .create_lease_size = sizeof(struct create_lease), }; +struct smb_version_values smb3any_values = { + .version_string = SMB3ANY_VERSION_STRING, + .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), +}; + +struct smb_version_values smbdefault_values = { + .version_string = SMBDEFAULT_VERSION_STRING, + .protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */ + .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION, + .large_lock_type = 0, + .exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK, + .shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK, + .unlock_lock_type = SMB2_LOCKFLAG_UNLOCK, + .header_size = sizeof(struct smb2_hdr), + .max_header_size = MAX_SMB2_HDR_SIZE, + .read_rsp_size = sizeof(struct smb2_read_rsp) - 1, + .lock_cmd = SMB2_LOCK, + .cap_unix = 0, + .cap_nt_find = SMB2_NT_FIND, + .cap_large_files = SMB2_LARGE_FILES, + .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED, + .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED, + .create_lease_size = sizeof(struct create_lease_v2), +}; + struct smb_version_values smb30_values = { .version_string = SMB30_VERSION_STRING, .protocol_id = SMB30_PROT_ID, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 69a751b038ab..5c16591a128e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -491,10 +491,25 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) req->hdr.sync_hdr.SessionId = 0; - req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); - - req->DialectCount = cpu_to_le16(1); /* One vers= at a time for now */ - inc_rfc1001_len(req, 2); + if (strcmp(ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) { + req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + req->DialectCount = cpu_to_le16(2); + inc_rfc1001_len(req, 4); + } else if (strcmp(ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + req->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + req->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + req->DialectCount = cpu_to_le16(3); + inc_rfc1001_len(req, 6); + } else { + /* otherwise send specific dialect */ + req->Dialects[0] = cpu_to_le16(ses->server->vals->protocol_id); + req->DialectCount = cpu_to_le16(1); + inc_rfc1001_len(req, 2); + } /* only one of SMB2 signing flags may be set in SMB2 request */ if (ses->sign) @@ -528,16 +543,42 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) */ if (rc == -EOPNOTSUPP) { cifs_dbg(VFS, "Dialect not supported by server. Consider " - "specifying vers=1.0 or vers=2.1 on mount for accessing" + "specifying vers=1.0 or vers=2.0 on mount for accessing" " older servers\n"); goto neg_exit; } else if (rc != 0) goto neg_exit; + if (strcmp(ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) { + if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { + cifs_dbg(VFS, + "SMB2 dialect returned but not requested\n"); + return -EIO; + } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { + cifs_dbg(VFS, + "SMB2.1 dialect returned but not requested\n"); + return -EIO; + } + } else if (strcmp(ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) { + cifs_dbg(VFS, + "SMB2 dialect returned but not requested\n"); + return -EIO; + } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { + /* ops set to 3.0 by default for default so update */ + ses->server->ops = &smb21_operations; + } + } else if (rsp->DialectRevision != ses->server->vals->protocol_id) { + /* if requested single dialect ensure returned dialect matched */ + cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n", + cpu_to_le16(rsp->DialectRevision)); + return -EIO; + } + cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); - /* BB we may eventually want to match the negotiated vs. requested - dialect, even though we are only requesting one at a time */ if (rsp->DialectRevision == cpu_to_le16(SMB20_PROT_ID)) cifs_dbg(FYI, "negotiated smb2.0 dialect\n"); else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) @@ -558,6 +599,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } server->dialect = le16_to_cpu(rsp->DialectRevision); + /* BB: add check that dialect was valid given dialect(s) we asked for */ + /* SMB2 only has an extended negflavor */ server->negflavor = CIFS_NEGFLAVOR_EXTENDED; /* set it to the maximum buffer size value we can send with 1 credit */ @@ -606,6 +649,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) struct validate_negotiate_info_req vneg_inbuf; struct validate_negotiate_info_rsp *pneg_rsp; u32 rsplen; + u32 inbuflen; /* max of 4 dialects */ cifs_dbg(FYI, "validate negotiate\n"); @@ -634,9 +678,30 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) else vneg_inbuf.SecurityMode = 0; - vneg_inbuf.DialectCount = cpu_to_le16(1); - vneg_inbuf.Dialects[0] = - cpu_to_le16(tcon->ses->server->vals->protocol_id); + + if (strcmp(tcon->ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) { + vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + vneg_inbuf.DialectCount = cpu_to_le16(2); + /* structure is big enough for 3 dialects, sending only 2 */ + inbuflen = sizeof(struct validate_negotiate_info_req) - 2; + } else if (strcmp(tcon->ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0) { + vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + vneg_inbuf.DialectCount = cpu_to_le16(3); + /* structure is big enough for 3 dialects */ + inbuflen = sizeof(struct validate_negotiate_info_req); + } else { + /* otherwise specific dialect was requested */ + vneg_inbuf.Dialects[0] = + cpu_to_le16(tcon->ses->server->vals->protocol_id); + vneg_inbuf.DialectCount = cpu_to_le16(1); + /* structure is big enough for 3 dialects, sending only 1 */ + inbuflen = sizeof(struct validate_negotiate_info_req) - 4; + } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 393ed5f4e1b6..6c9653a130c8 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -716,7 +716,7 @@ struct validate_negotiate_info_req { __u8 Guid[SMB2_CLIENT_GUID_SIZE]; __le16 SecurityMode; __le16 DialectCount; - __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ + __le16 Dialects[3]; /* BB expand this if autonegotiate > 3 dialects */ } __packed; struct validate_negotiate_info_rsp { From 1368f155a972eab037f81b5dea82afd544227552 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 11:24:15 +0200 Subject: [PATCH 046/279] cifs: hide unused functions The newly added SMB2+ attribute support causes unused function warnings when CONFIG_CIFS_XATTR is disabled: fs/cifs/smb2ops.c:563:1: error: 'smb2_set_ea' defined but not used [-Werror=unused-function] smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, fs/cifs/smb2ops.c:513:1: error: 'smb2_query_eas' defined but not used [-Werror=unused-function] smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, This adds another #ifdef around the affected functions. Fixes: 5517554e4313 ("cifs: Add support for writing attributes on SMB2+") Fixes: 95907fea4fd8 ("cifs: Add support for reading attributes on SMB2+") Signed-off-by: Arnd Bergmann Acked-by: Geert Uytterhoeven Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 405a9bf13aac..0dafdbae1f8c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -426,6 +426,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +#ifdef CONFIG_CIFS_XATTR static ssize_t move_smb2_ea_to_cifs(char *dst, size_t dst_size, struct smb2_file_full_ea_info *src, size_t src_size, @@ -613,6 +614,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, return rc; } +#endif static bool smb2_can_echo(struct TCP_Server_Info *server) From 94a9daeaece415ce37ccb413b49a580e68e3477b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 12 Sep 2017 18:13:11 -0500 Subject: [PATCH 047/279] Update version of cifs module Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 30bf89b1fd9a..5a10e566f0e6 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -149,5 +149,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.09" +#define CIFS_VERSION "2.10" #endif /* _CIFSFS_H */ From b8f3911610529ba531b99d1e109c7a40fb071f0a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 12 Sep 2017 15:10:35 +0200 Subject: [PATCH 048/279] mtd: spi-nor: Check consistency of the memory size extracted from the SFDP One field of the flash parameter table contains information about the flash device size. Most of the time the data extracted from this field is valid, but sometimes the BFPT section of the SFDP table is corrupted or invalid and this field is set to 0xffffffff, thus resulting in an integer overflow when setting params->size. Since NOR devices are anayway always smaller than 2^64 bytes, we can easily stop the BFPT parsing if the size reported in this table is invalid. Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables") Reported-by: Geert Uytterhoeven Signed-off-by: Boris Brezillon Tested-by: Geert Uytterhoeven Acked-by: Cyrille Pitchen --- drivers/mtd/spi-nor/spi-nor.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index cf1d4a15e10a..4425b0283725 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -2127,6 +2127,15 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, params->size = bfpt.dwords[BFPT_DWORD(2)]; if (params->size & BIT(31)) { params->size &= ~BIT(31); + + /* + * Prevent overflows on params->size. Anyway, a NOR of 2^64 + * bits is unlikely to exist so this error probably means + * the BFPT we are reading is corrupted/wrong. + */ + if (params->size > 63) + return -EINVAL; + params->size = 1ULL << params->size; } else { params->size++; From bfa4133795e5a0badd402dd3f58b13b3cec64a4b Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Wed, 6 Sep 2017 23:45:02 +0200 Subject: [PATCH 049/279] mtd: spi-nor: fix DMA unsafe buffer issue in spi_nor_read_sfdp() spi_nor_read_sfdp() calls nor->read() to read the SFDP data. When the m25p80 driver is used (pretty common case), nor->read() is then implemented by the m25p80_read() function, which is likely to initialize a 'struct spi_transfer' from its buf argument before appending this structure inside the 'struct spi_message' argument of spi_sync(). Besides the SPI sub-system states that both .tx_buf and .rx_buf members of 'struct spi_transfer' must point into dma-safe memory. However, two of the three calls of spi_nor_read_sfdp() were given pointers to stack allocated memory as buf argument, hence not in a dma-safe area. Hopefully, the third and last call of spi_nor_read_sfdp() was already given a kmalloc'ed buffer argument, hence dma-safe. So this patch fixes this issue by introducing a spi_nor_read_sfdp_dma_unsafe() function which simply wraps the existing spi_nor_read_sfdp() function and uses some kmalloc'ed memory as a bounce buffer. Fixes: f384b352cbf0 ("mtd: spi-nor: parse Serial Flash Discoverable Parameters (SFDP) tables") Reported-by: Geert Uytterhoeven Signed-off-by: Cyrille Pitchen Signed-off-by: Boris Brezillon --- drivers/mtd/spi-nor/spi-nor.c | 36 ++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 4425b0283725..19c000722cbc 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1784,7 +1784,7 @@ spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, * @nor: pointer to a 'struct spi_nor' * @addr: offset in the SFDP area to start reading data from * @len: number of bytes to read - * @buf: buffer where the SFDP data are copied into + * @buf: buffer where the SFDP data are copied into (dma-safe memory) * * Whatever the actual numbers of bytes for address and dummy cycles are * for (Fast) Read commands, the Read SFDP (5Ah) instruction is always @@ -1829,6 +1829,36 @@ read_err: return ret; } +/** + * spi_nor_read_sfdp_dma_unsafe() - read Serial Flash Discoverable Parameters. + * @nor: pointer to a 'struct spi_nor' + * @addr: offset in the SFDP area to start reading data from + * @len: number of bytes to read + * @buf: buffer where the SFDP data are copied into + * + * Wrap spi_nor_read_sfdp() using a kmalloc'ed bounce buffer as @buf is now not + * guaranteed to be dma-safe. + * + * Return: -ENOMEM if kmalloc() fails, the return code of spi_nor_read_sfdp() + * otherwise. + */ +static int spi_nor_read_sfdp_dma_unsafe(struct spi_nor *nor, u32 addr, + size_t len, void *buf) +{ + void *dma_safe_buf; + int ret; + + dma_safe_buf = kmalloc(len, GFP_KERNEL); + if (!dma_safe_buf) + return -ENOMEM; + + ret = spi_nor_read_sfdp(nor, addr, len, dma_safe_buf); + memcpy(buf, dma_safe_buf, len); + kfree(dma_safe_buf); + + return ret; +} + struct sfdp_parameter_header { u8 id_lsb; u8 minor; @@ -2101,7 +2131,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor, bfpt_header->length * sizeof(u32)); addr = SFDP_PARAM_HEADER_PTP(bfpt_header); memset(&bfpt, 0, sizeof(bfpt)); - err = spi_nor_read_sfdp(nor, addr, len, &bfpt); + err = spi_nor_read_sfdp_dma_unsafe(nor, addr, len, &bfpt); if (err < 0) return err; @@ -2252,7 +2282,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor, int i, err; /* Get the SFDP header. */ - err = spi_nor_read_sfdp(nor, 0, sizeof(header), &header); + err = spi_nor_read_sfdp_dma_unsafe(nor, 0, sizeof(header), &header); if (err < 0) return err; From 17b694a5476de09e119c517dcc3b71eff6835bdf Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 3 Sep 2017 13:58:31 +0200 Subject: [PATCH 050/279] mtd: nand: lpc32xx_mlc: Fix an error handling path in lpc32xx_nand_probe() If 'clk_prepare_enable()' fails, we must 'put' the corresponding clock. Fixes: 4d26f012ab59 ("mtd: nand: lpc32xx_mlc: Handle return value of clk_prepare_enable.") Signed-off-by: Christophe JAILLET Signed-off-by: Boris Brezillon --- drivers/mtd/nand/lpc32xx_mlc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c index c3bb358ef01e..5796468db653 100644 --- a/drivers/mtd/nand/lpc32xx_mlc.c +++ b/drivers/mtd/nand/lpc32xx_mlc.c @@ -707,7 +707,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) } res = clk_prepare_enable(host->clk); if (res) - goto err_exit1; + goto err_put_clk; nand_chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl; nand_chip->dev_ready = lpc32xx_nand_device_ready; @@ -814,6 +814,7 @@ err_exit3: dma_release_channel(host->dma_chan); err_exit2: clk_disable_unprepare(host->clk); +err_put_clk: clk_put(host->clk); err_exit1: lpc32xx_wp_enable(host); From e580b8bc4316cbb8bbffb5ed7bf1e477064755ed Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 18 Sep 2017 09:40:12 +0100 Subject: [PATCH 051/279] arm64: efi: Don't include EFI fpsimd save/restore code in non-EFI kernels __efi_fpsimd_begin()/__efi_fpsimd_end() are for use when making EFI calls only, so using them in non-EFI kernels is not allowed. This patch compiles them out if CONFIG_EFI is not set. Acked-by: Ard Biesheuvel Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3a68cf38a6b3..f444f374bd7b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -321,6 +321,8 @@ void kernel_neon_end(void) } EXPORT_SYMBOL(kernel_neon_end); +#ifdef CONFIG_EFI + static DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); @@ -370,6 +372,8 @@ void __efi_fpsimd_end(void) kernel_neon_end(); } +#endif /* CONFIG_EFI */ + #endif /* CONFIG_KERNEL_MODE_NEON */ #ifdef CONFIG_CPU_PM From c73cc120a33e12e4e254b4b42bc613204ccb923b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Sep 2017 11:20:19 +0100 Subject: [PATCH 052/279] arm64: relax assembly code alignment from 16 byte to 4 byte Aarch64 instructions must be word aligned. The current 16 byte alignment is more than enough. Relax it into 4 byte alignment. Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/linkage.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index 636c1bced7d4..1b266292f0be 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -1,7 +1,7 @@ #ifndef __ASM_LINKAGE_H #define __ASM_LINKAGE_H -#define __ALIGN .align 4 -#define __ALIGN_STR ".align 4" +#define __ALIGN .align 2 +#define __ALIGN_STR ".align 2" #endif From 3d6a7b99e3fa29b92d6288487e057e0a596bd2b0 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Mon, 18 Sep 2017 11:20:20 +0100 Subject: [PATCH 053/279] arm64: ensure the kernel is compiled for LP64 The kernel needs to be compiled as a LP64 binary for ARM64, even when using a compiler that defaults to code-generation for the ILP32 ABI. Consequently, we need to explicitly pass '-mabi=lp64' (supported on gcc-4.9 and newer). Signed-off-by: Andrew Pinski Signed-off-by: Philipp Tomsich Signed-off-by: Christoph Muellner Signed-off-by: Yury Norov Reviewed-by: David Daney Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 9b41f1e3b1a0..939b310913cf 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -50,17 +50,22 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) +KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) +KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB +LDFLAGS += -maarch64linuxb UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL +LDFLAGS += -maarch64linux UTS_MACHINE := aarch64 endif From 74378c5c8cdaf0ce9f65e67cbd0613286f2c3bad Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Sep 2017 20:16:27 +0200 Subject: [PATCH 054/279] driver core: Fix link to device power management documentation Correct location as of commit 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST). Fixes: 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST) Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index c6f27207dbe8..1d2607923a24 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -838,7 +838,7 @@ struct dev_links_info { * @driver_data: Private pointer for driver specific info. * @links: Links to suppliers and consumers of this device. * @power: For device power management. - * See Documentation/power/admin-guide/devices.rst for details. + * See Documentation/driver-api/pm/devices.rst for details. * @pm_domain: Provide callbacks that are executed during system suspend, * hibernation, system resume and during runtime PM transitions * along with subsystem-level and driver-level callbacks. From 096a2c610df279d001a8f61faa3eb7f98ca6196b Mon Sep 17 00:00:00 2001 From: Rafael Wysocki Date: Fri, 8 Sep 2017 01:20:54 +0200 Subject: [PATCH 055/279] ACPI / PMIC: Add code reviewers to MAINTAINERS Andy and Mika review code changes under drivers/acpi/pmic/ on a regular basis and I rely on their help with that, so add them as code reviwewers for that part of the kernel. Signed-off-by: Rafael J. Wysocki Acked-by: Lee Jones Reviewed-by: Mika Westerberg Reviewed-by: Andy Shevchenko --- MAINTAINERS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..fa4e916b72e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -352,6 +352,18 @@ L: linux-acpi@vger.kernel.org S: Maintained F: drivers/acpi/arm64 +ACPI PMIC DRIVERS +M: "Rafael J. Wysocki" +M: Len Brown +R: Andy Shevchenko +R: Mika Westerberg +L: linux-acpi@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-acpi/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +B: https://bugzilla.kernel.org +S: Supported +F: drivers/acpi/pmic/ + ACPI THERMAL DRIVER M: Zhang Rui L: linux-acpi@vger.kernel.org From 41ba8bd0829f5c210715ece8b0f699bed0da8eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Tue, 5 Sep 2017 23:14:29 +0200 Subject: [PATCH 056/279] PM / QoS: Use the correct variable to check the QoS request type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the actual function argument for the validation of the request type, instead of the type field in a fresh (supposedly zero-initialized) request structure. Signed-off-by: Jan H. Schönherr Signed-off-by: Rafael J. Wysocki --- drivers/base/power/qos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index f850daeffba4..277d43a83f53 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -277,11 +277,11 @@ void dev_pm_qos_constraints_destroy(struct device *dev) mutex_unlock(&dev_pm_qos_sysfs_mtx); } -static bool dev_pm_qos_invalid_request(struct device *dev, - struct dev_pm_qos_request *req) +static bool dev_pm_qos_invalid_req_type(struct device *dev, + enum dev_pm_qos_req_type type) { - return !req || (req->type == DEV_PM_QOS_LATENCY_TOLERANCE - && !dev->power.set_latency_tolerance); + return type == DEV_PM_QOS_LATENCY_TOLERANCE && + !dev->power.set_latency_tolerance; } static int __dev_pm_qos_add_request(struct device *dev, @@ -290,7 +290,7 @@ static int __dev_pm_qos_add_request(struct device *dev, { int ret = 0; - if (!dev || dev_pm_qos_invalid_request(dev, req)) + if (!dev || !req || dev_pm_qos_invalid_req_type(dev, type)) return -EINVAL; if (WARN(dev_pm_qos_request_active(req), From 73600b619bf8b9803a0610624a0e11f5ed8f761e Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sat, 2 Sep 2017 10:49:38 +0200 Subject: [PATCH 057/279] mtd: nand: remove unused blockmask variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fix the following build warning: drivers/mtd/nand/nand_base.c:2671:30: attention : variable ‘blockmask’ set but not used [-Wunused-but-set-variable] Fixes: 0b4773fd1649 ("mtd: nand: Drop unused cached programming support") Signed-off-by: Corentin Labbe Signed-off-by: Boris Brezillon --- drivers/mtd/nand/nand_base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index bcc8cef1c615..12edaae17d81 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2668,7 +2668,7 @@ static uint8_t *nand_fill_oob(struct mtd_info *mtd, uint8_t *oob, size_t len, static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { - int chipnr, realpage, page, blockmask, column; + int chipnr, realpage, page, column; struct nand_chip *chip = mtd_to_nand(mtd); uint32_t writelen = ops->len; @@ -2704,7 +2704,6 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to, realpage = (int)(to >> chip->page_shift); page = realpage & chip->pagemask; - blockmask = (1 << (chip->phys_erase_shift - chip->page_shift)) - 1; /* Invalidate the page cache, when we write to the cached page */ if (to <= ((loff_t)chip->pagebuf << chip->page_shift) && From fe9c1c9555a529bf678148f719c1b9f1730f68a3 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 14 Sep 2017 14:38:58 +0200 Subject: [PATCH 058/279] xen: don't compile pv-specific parts if XEN_PV isn't configured xenbus_client.c contains some functions specific for pv guests. Enclose them with #ifdef CONFIG_XEN_PV to avoid compiling them when they are not needed (e.g. on ARM). Signed-off-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/xenbus/xenbus_client.c | 130 +++++++++++++++-------------- 1 file changed, 67 insertions(+), 63 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 82a8866758ee..a1c17000129b 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -519,64 +519,6 @@ static int __xenbus_map_ring(struct xenbus_device *dev, return err; } -static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, - grant_ref_t *gnt_refs, - unsigned int nr_grefs, - void **vaddr) -{ - struct xenbus_map_node *node; - struct vm_struct *area; - pte_t *ptes[XENBUS_MAX_RING_GRANTS]; - phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; - int err = GNTST_okay; - int i; - bool leaked; - - *vaddr = NULL; - - if (nr_grefs > XENBUS_MAX_RING_GRANTS) - return -EINVAL; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - - area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes); - if (!area) { - kfree(node); - return -ENOMEM; - } - - for (i = 0; i < nr_grefs; i++) - phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr; - - err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles, - phys_addrs, - GNTMAP_host_map | GNTMAP_contains_pte, - &leaked); - if (err) - goto failed; - - node->nr_handles = nr_grefs; - node->pv.area = area; - - spin_lock(&xenbus_valloc_lock); - list_add(&node->next, &xenbus_valloc_pages); - spin_unlock(&xenbus_valloc_lock); - - *vaddr = area->addr; - return 0; - -failed: - if (!leaked) - free_vm_area(area); - else - pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); - - kfree(node); - return err; -} - struct map_ring_valloc_hvm { unsigned int idx; @@ -725,6 +667,65 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) } EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +#ifdef CONFIG_XEN_PV +static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, + grant_ref_t *gnt_refs, + unsigned int nr_grefs, + void **vaddr) +{ + struct xenbus_map_node *node; + struct vm_struct *area; + pte_t *ptes[XENBUS_MAX_RING_GRANTS]; + phys_addr_t phys_addrs[XENBUS_MAX_RING_GRANTS]; + int err = GNTST_okay; + int i; + bool leaked; + + *vaddr = NULL; + + if (nr_grefs > XENBUS_MAX_RING_GRANTS) + return -EINVAL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + area = alloc_vm_area(XEN_PAGE_SIZE * nr_grefs, ptes); + if (!area) { + kfree(node); + return -ENOMEM; + } + + for (i = 0; i < nr_grefs; i++) + phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr; + + err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles, + phys_addrs, + GNTMAP_host_map | GNTMAP_contains_pte, + &leaked); + if (err) + goto failed; + + node->nr_handles = nr_grefs; + node->pv.area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = area->addr; + return 0; + +failed: + if (!leaked) + free_vm_area(area); + else + pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs); + + kfree(node); + return err; +} + static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) { struct xenbus_map_node *node; @@ -788,6 +789,12 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) return err; } +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_valloc_pv, + .unmap = xenbus_unmap_ring_vfree_pv, +}; +#endif + struct unmap_ring_vfree_hvm { unsigned int idx; @@ -916,11 +923,6 @@ enum xenbus_state xenbus_read_driver_state(const char *path) } EXPORT_SYMBOL_GPL(xenbus_read_driver_state); -static const struct xenbus_ring_ops ring_ops_pv = { - .map = xenbus_map_ring_valloc_pv, - .unmap = xenbus_unmap_ring_vfree_pv, -}; - static const struct xenbus_ring_ops ring_ops_hvm = { .map = xenbus_map_ring_valloc_hvm, .unmap = xenbus_unmap_ring_vfree_hvm, @@ -928,8 +930,10 @@ static const struct xenbus_ring_ops ring_ops_hvm = { void __init xenbus_ring_ops_init(void) { +#ifdef CONFIG_XEN_PV if (!xen_feature(XENFEAT_auto_translated_physmap)) ring_ops = &ring_ops_pv; else +#endif ring_ops = &ring_ops_hvm; } From b0ade85165b3caeb0cd908cffe5921a39f25c243 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 10 Sep 2017 13:41:41 +0200 Subject: [PATCH 059/279] netfilter: nat: Do not use ARRAY_SIZE() on spinlocks to fix zero div MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If no spinlock debugging options (CONFIG_GENERIC_LOCKBREAK, CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_LOCK_ALLOC) are enabled on a UP platform (e.g. m68k defconfig), arch_spinlock_t is an empty struct, hence using ARRAY_SIZE(nf_nat_locks) causes a division by zero: net/netfilter/nf_nat_core.c: In function ‘nf_nat_setup_info’: net/netfilter/nf_nat_core.c:432: warning: division by zero net/netfilter/nf_nat_core.c: In function ‘__nf_nat_cleanup_conntrack’: net/netfilter/nf_nat_core.c:535: warning: division by zero net/netfilter/nf_nat_core.c:537: warning: division by zero net/netfilter/nf_nat_core.c: In function ‘nf_nat_init’: net/netfilter/nf_nat_core.c:810: warning: division by zero net/netfilter/nf_nat_core.c:811: warning: division by zero net/netfilter/nf_nat_core.c:824: warning: division by zero Fix this by using the CONNTRACK_LOCKS definition instead. Suggested-by: Florian Westphal Fixes: 8073e960a03bf7b5 ("netfilter: nat: use keyed locks") Signed-off-by: Geert Uytterhoeven Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index f393a7086025..af8345fc4fbd 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -429,7 +429,7 @@ nf_nat_setup_info(struct nf_conn *ct, srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - lock = &nf_nat_locks[srchash % ARRAY_SIZE(nf_nat_locks)]; + lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS]; spin_lock_bh(lock); hlist_add_head_rcu(&ct->nat_bysource, &nf_nat_bysource[srchash]); @@ -532,9 +532,9 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct) unsigned int h; h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - spin_lock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]); + spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); hlist_del_rcu(&ct->nat_bysource); - spin_unlock_bh(&nf_nat_locks[h % ARRAY_SIZE(nf_nat_locks)]); + spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); } static int nf_nat_proto_clean(struct nf_conn *ct, void *data) @@ -807,8 +807,8 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - if (nf_nat_htable_size < ARRAY_SIZE(nf_nat_locks)) - nf_nat_htable_size = ARRAY_SIZE(nf_nat_locks); + if (nf_nat_htable_size < CONNTRACK_LOCKS) + nf_nat_htable_size = CONNTRACK_LOCKS; nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); if (!nf_nat_bysource) @@ -821,7 +821,7 @@ static int __init nf_nat_init(void) return ret; } - for (i = 0; i < ARRAY_SIZE(nf_nat_locks); i++) + for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_nat_locks[i]); nf_ct_helper_expectfn_register(&follow_master_nat); From 7f4f7dd4417d9efd038b14d39c70170db2e0baa0 Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Mon, 11 Sep 2017 21:52:40 +0200 Subject: [PATCH 060/279] netfilter: ipset: ipset list may return wrong member count for set with timeout Simple testcase: $ ipset create test hash:ip timeout 5 $ ipset add test 1.2.3.4 $ ipset add test 1.2.2.2 $ sleep 5 $ ipset l Name: test Type: hash:ip Revision: 5 Header: family inet hashsize 1024 maxelem 65536 timeout 5 Size in memory: 296 References: 0 Number of entries: 2 Members: We return "Number of entries: 2" but no members are listed. That is because mtype_list runs "ip_set_timeout_expired" and does not list the expired entries, but set->elements is never upated (until mtype_gc cleans it up later). Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index f236c0bc7b3f..51063d9ed0f7 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -1041,12 +1041,24 @@ out: static int mtype_head(struct ip_set *set, struct sk_buff *skb) { - const struct htype *h = set->data; + struct htype *h = set->data; const struct htable *t; struct nlattr *nested; size_t memsize; u8 htable_bits; + /* If any members have expired, set->elements will be wrong + * mytype_expire function will update it with the right count. + * we do not hold set->lock here, so grab it first. + * set->elements can still be incorrect in the case of a huge set, + * because elements might time out during the listing. + */ + if (SET_WITH_TIMEOUT(set)) { + spin_lock_bh(&set->lock); + mtype_expire(set, h); + spin_unlock_bh(&set->lock); + } + rcu_read_lock_bh(); t = rcu_dereference_bh_nfnl(h->table); memsize = mtype_ahash_memsize(h, t) + set->ext_size; From 63ecc3d9436f8012e49dc846d6cb0a85a3433517 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan Date: Wed, 13 Sep 2017 19:30:51 -0600 Subject: [PATCH 061/279] udpv6: Fix the checksum computation when HW checksum does not apply While trying an ESP transport mode encryption for UDPv6 packets of datagram size 1436 with MTU 1500, checksum error was observed in the secondary fragment. This error occurs due to the UDP payload checksum being missed out when computing the full checksum for these packets in udp6_hwcsum_outgoing(). Fixes: d39d938c8228 ("ipv6: Introduce udpv6_send_skb()") Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: David S. Miller --- net/ipv6/udp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e2ecfb137297..40d7234c27b9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1015,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, */ offset = skb_transport_offset(skb); skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + csum = skb->csum; skb->ip_summed = CHECKSUM_NONE; From 265698d7e6132a2d41471135534f4f36ad15b09c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 18 Sep 2017 22:46:36 +0200 Subject: [PATCH 062/279] nl80211: fix null-ptr dereference on invalid mesh configuration If TX rates are specified during mesh join, the channel must also be specified. Check the channel pointer to avoid a null pointer dereference if it isn't. Reported-by: Jouni Malinen Fixes: 8564e38206de ("cfg80211: add checks for beacon rate, extend to mesh") Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fbd5593e88cb..690874293cfc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9987,6 +9987,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!setup.chandef.chan) + return -EINVAL; + err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, &setup.beacon_rate); if (err) From 76cc0d3282d4b933fa144fa41fbc5318e0fdca24 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Sep 2017 12:00:07 +0800 Subject: [PATCH 063/279] ip6_gre: skb_push ipv6hdr before packing the header in ip6gre_header Now in ip6gre_header before packing the ipv6 header, it skb_push t->hlen which only includes encap_hlen + tun_hlen. It means greh and inner header would be over written by ipv6 stuff and ipv6h might have no chance to set up. Jianlin found this issue when using remote any on ip6_gre, the packets he captured on gre dev are truncated: 22:50:26.210866 Out ethertype IPv6 (0x86dd), length 120: truncated-ip6 -\ 8128 bytes missing!(flowlabel 0x92f40, hlim 0, next-header Options (0) \ payload length: 8192) ::1:2000:0 > ::1:0:86dd: HBH [trunc] ip-proto-128 \ 8184 It should also skb_push ipv6hdr so that ipv6h points to the right position to set ipv6 stuff up. This patch is to skb_push hlen + sizeof(*ipv6h) and also fix some indents in ip6gre_header. Fixes: c12b395a4664 ("gre: Support GRE over IPv6") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b7a72d409334..20f66f4c9460 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -940,24 +940,25 @@ done: } static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned int len) + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(ipv6h+1); + struct ipv6hdr *ipv6h; + __be16 *p; - ip6_flow_hdr(ipv6h, 0, - ip6_make_flowlabel(dev_net(dev), skb, - t->fl.u.ip6.flowlabel, true, - &t->fl.u.ip6)); + ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h)); + ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, + true, &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; ipv6h->daddr = t->parms.raddr; - p[0] = t->parms.o_flags; - p[1] = htons(type); + p = (__be16 *)(ipv6h + 1); + p[0] = t->parms.o_flags; + p[1] = htons(type); /* * Set the source hardware address. From 7b4dc3c0da0d66e7b20a826c537d41bb73e4df54 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 18 Aug 2017 17:49:58 +0800 Subject: [PATCH 064/279] drm/i915/gvt: Fix incorrect PCI BARs reporting Looking at our virtual PCI device, we can see surprising Region 4 and Region 5. 00:10.0 VGA compatible controller: Intel Corporation Sky Lake Integrated Graphics (rev 06) (prog-if 00 [VGA controller]) .... Region 0: Memory at 140000000 (64-bit, non-prefetchable) [size=16M] Region 2: Memory at 180000000 (64-bit, prefetchable) [size=1G] Region 4: Memory at (32-bit, non-prefetchable) Region 5: Memory at (32-bit, non-prefetchable) Expansion ROM at febd6000 [disabled] [size=2K] The fact is that we only implemented BAR0 and BAR2. Surprising Region 4 and Region 5 are shown because we report their size as 0xffffffff. They should report size 0 instead. BTW, the physical GPU has a PIO BAR. GVTg hasn't implemented PIO access, so we ignored this BAR for vGPU device. v2: fix BAR size value calculation. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1458032 Signed-off-by: Changbin Du Cc: stable@vger.kernel.org Signed-off-by: Zhenyu Wang (cherry picked from commit f1751362d6357a90bc6e53176cec715ff2dbed74) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gvt/cfg_space.c | 113 ++++++++++++--------------- 1 file changed, 48 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 40af17ec6312..ff3154fe6588 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -197,78 +197,65 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - unsigned int bar_index = - (rounddown(offset, 8) % PCI_BASE_ADDRESS_0) / 8; u32 new = *(u32 *)(p_data); bool lo = IS_ALIGNED(offset, 8); u64 size; int ret = 0; bool mmio_enabled = vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY; + struct intel_vgpu_pci_bar *bars = vgpu->cfg_space.bar; - if (WARN_ON(bar_index >= INTEL_GVT_PCI_BAR_MAX)) - return -EINVAL; - + /* + * Power-up software can determine how much address + * space the device requires by writing a value of + * all 1's to the register and then reading the value + * back. The device will return 0's in all don't-care + * address bits. + */ if (new == 0xffffffff) { - /* - * Power-up software can determine how much address - * space the device requires by writing a value of - * all 1's to the register and then reading the value - * back. The device will return 0's in all don't-care - * address bits. - */ - size = vgpu->cfg_space.bar[bar_index].size; - if (lo) { - new = rounddown(new, size); - } else { - u32 val = vgpu_cfg_space(vgpu)[rounddown(offset, 8)]; - /* for 32bit mode bar it returns all-0 in upper 32 - * bit, for 64bit mode bar it will calculate the - * size with lower 32bit and return the corresponding - * value + switch (offset) { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + size = ~(bars[INTEL_GVT_PCI_BAR_GTTMMIO].size -1); + intel_vgpu_write_pci_bar(vgpu, offset, + size >> (lo ? 0 : 32), lo); + /* + * Untrap the BAR, since guest hasn't configured a + * valid GPA */ - if (val & PCI_BASE_ADDRESS_MEM_TYPE_64) - new &= (~(size-1)) >> 32; - else - new = 0; - } - /* - * Unmapp & untrap the BAR, since guest hasn't configured a - * valid GPA - */ - switch (bar_index) { - case INTEL_GVT_PCI_BAR_GTTMMIO: ret = trap_gttmmio(vgpu, false); break; - case INTEL_GVT_PCI_BAR_APERTURE: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + size = ~(bars[INTEL_GVT_PCI_BAR_APERTURE].size -1); + intel_vgpu_write_pci_bar(vgpu, offset, + size >> (lo ? 0 : 32), lo); ret = map_aperture(vgpu, false); break; + default: + /* Unimplemented BARs */ + intel_vgpu_write_pci_bar(vgpu, offset, 0x0, false); } - intel_vgpu_write_pci_bar(vgpu, offset, new, lo); } else { - /* - * Unmapp & untrap the old BAR first, since guest has - * re-configured the BAR - */ - switch (bar_index) { - case INTEL_GVT_PCI_BAR_GTTMMIO: - ret = trap_gttmmio(vgpu, false); + switch (offset) { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + /* + * Untrap the old BAR first, since guest has + * re-configured the BAR + */ + trap_gttmmio(vgpu, false); + intel_vgpu_write_pci_bar(vgpu, offset, new, lo); + ret = trap_gttmmio(vgpu, mmio_enabled); break; - case INTEL_GVT_PCI_BAR_APERTURE: - ret = map_aperture(vgpu, false); + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + map_aperture(vgpu, false); + intel_vgpu_write_pci_bar(vgpu, offset, new, lo); + ret = map_aperture(vgpu, mmio_enabled); break; - } - intel_vgpu_write_pci_bar(vgpu, offset, new, lo); - /* Track the new BAR */ - if (mmio_enabled) { - switch (bar_index) { - case INTEL_GVT_PCI_BAR_GTTMMIO: - ret = trap_gttmmio(vgpu, true); - break; - case INTEL_GVT_PCI_BAR_APERTURE: - ret = map_aperture(vgpu, true); - break; - } + default: + intel_vgpu_write_pci_bar(vgpu, offset, new, lo); } } return ret; @@ -299,10 +286,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, } switch (rounddown(offset, 4)) { - case PCI_BASE_ADDRESS_0: - case PCI_BASE_ADDRESS_1: - case PCI_BASE_ADDRESS_2: - case PCI_BASE_ADDRESS_3: + case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5: if (WARN_ON(!IS_ALIGNED(offset, 4))) return -EINVAL; return emulate_pci_bar_write(vgpu, offset, p_data, bytes); @@ -344,7 +328,6 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, struct intel_gvt *gvt = vgpu->gvt; const struct intel_gvt_device_info *info = &gvt->device_info; u16 *gmch_ctl; - int i; memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space, info->cfg_space_size); @@ -371,13 +354,13 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, */ memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4); memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4); + memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_4, 0, 8); memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4); - for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) { - vgpu->cfg_space.bar[i].size = pci_resource_len( - gvt->dev_priv->drm.pdev, i * 2); - vgpu->cfg_space.bar[i].tracked = false; - } + vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size = + pci_resource_len(gvt->dev_priv->drm.pdev, 0); + vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size = + pci_resource_len(gvt->dev_priv->drm.pdev, 2); } /** From 814feed316e9d15b0ae869ca729370e7630b8d13 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 10 Sep 2017 10:56:42 +0200 Subject: [PATCH 065/279] drm/i915: Fix an error handling in 'intel_framebuffer_init()' We should go through the error handling path to decrease the 'framebuffer_references' as done everywhere else in this function. Fixes: 2e2adb05736c ("drm/i915: Add render decompression support") Signed-off-by: Christophe JAILLET Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170910085642.13673-1-christophe.jaillet@wanadoo.fr (cherry picked from commit 37875d6b3af702425ce292de81b77faf94766616) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f17275519484..00cd17c76fdc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14030,7 +14030,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (mode_cmd->handles[i] != mode_cmd->handles[0]) { DRM_DEBUG_KMS("bad plane %d handle\n", i); - return -EINVAL; + goto err; } stride_alignment = intel_fb_stride_alignment(fb, i); From ac73661c6222faef1a97ec44f424234f165e4710 Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Tue, 12 Sep 2017 11:36:30 +0800 Subject: [PATCH 066/279] drm/i915/bxt: set min brightness from VBT Min brightness value from vbt was missing for BXT platform. This setting have to refer backlight ic spec to restrict min backlight output. Without this restriction, driver would allow to configure lower brightness value and violate backlight ic requirement. Fixes: 0fb890c01349 ("drm/i915/bxt: BLC implementation") Cc: Jani Nikula Cc: Cooper Chiou Cc: Gary C Wang Signed-off-by: Shawn Lee Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1505187390-7039-1-git-send-email-shawn.c.lee@intel.com (cherry picked from commit c3881128cb672cf484a52fbb36b5daa9044d9168) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_panel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index a17b1de7d7e0..d4dd248ac9a8 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1699,6 +1699,8 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) if (!panel->backlight.max) return -ENODEV; + panel->backlight.min = get_backlight_min_vbt(connector); + val = bxt_get_backlight(connector); val = intel_panel_compute_brightness(connector, val); panel->backlight.level = clamp(val, panel->backlight.min, From abeae421b03d800d33894df7fbca6d00c70c358e Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Tue, 5 Sep 2017 15:14:31 +0530 Subject: [PATCH 067/279] Revert "drm/i915/bxt: Disable device ready before shutdown command" This reverts commit bbdf0b2ff32a ("drm/i915/bxt: Disable device ready before shutdown command"). Disable device ready before shutdown command was added previously to avoid a split screen issue seen on dual link DSI panels. As of now, dual link is not supported and will need some rework in the upstream code. For single link DSI panels, the change is not required. This will cause failure in sending SHUTDOWN packet during disable. Hence reverting the change. Will handle the change as part of dual link enabling in upstream. Fixes: bbdf0b2ff32a ("drm/i915/bxt: Disable device ready before shutdown command") Cc: # v4.12+ Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1504604671-17237-1-git-send-email-vidya.srinivas@intel.com (cherry picked from commit 33c8d8870c67faf3161898a56af98ac3c1c71450) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_dsi.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index f0c11aec5ea5..7442891762be 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -892,8 +892,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -902,15 +900,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder, intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_panel_disable_backlight(old_conn_state); - /* - * Disable Device ready before the port shutdown in order - * to avoid split screen - */ - if (IS_BROXTON(dev_priv)) { - for_each_dsi_port(port, intel_dsi->ports) - I915_WRITE(MIPI_DEVICE_READY(port), 0); - } - /* * According to the spec we should send SHUTDOWN before * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing From 8c7a758873577f0d1bb3f879584338f73e6c6bc3 Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Wed, 13 Sep 2017 13:19:20 +0800 Subject: [PATCH 068/279] drm/i915/cnp: set min brightness from VBT Min brightness value from vbt was missing for CNP platform. This setting have to refer backlight ic spec to restrict min backlight output. Without this restriction, driver would allow to configure lower brightness value and violate backlight ic requirement. Fixes: 4c9f7086ac6d ("drm/i915/cnp: Backlight support for CNP.") Cc: Jani Nikula Signed-off-by: Shawn Lee Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/1505279961-16140-1-git-send-email-shawn.c.lee@intel.com (cherry picked from commit f44e354f857f207cd361269c5e38e1f96e0b616c) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_panel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index d4dd248ac9a8..3b1c5d783ee7 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1737,6 +1737,8 @@ cnp_setup_backlight(struct intel_connector *connector, enum pipe unused) if (!panel->backlight.max) return -ENODEV; + panel->backlight.min = get_backlight_min_vbt(connector); + val = bxt_get_backlight(connector); val = intel_panel_compute_brightness(connector, val); panel->backlight.level = clamp(val, panel->backlight.min, From 99df13b6ea811a63eeacb278d05a5b914ce28073 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Sep 2017 17:42:13 +0100 Subject: [PATCH 069/279] drm/i915: Remove unused 'in_vbl' from i915_get_crtc_scanoutpos() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1bf6ad622b9b ("drm/vblank: drop the mode argument from drm_calc_vbltimestamp_from_scanoutpos") removed the use of in_vbl, but did not remove the local variable. Do so now. Fixes: 1bf6ad622b9b ("drm/vblank: drop the mode argument from drm_calc_vbltimestamp_from_scanoutpos") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20170914164213.18461-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä (cherry picked from commit e01e71fc49d4c95090a04f898a3fe788c652a04b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_irq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e21ce9c18b6e..b63893eeca73 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -839,7 +839,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, pipe); int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; - bool in_vbl = true; unsigned long irqflags; if (WARN_ON(!mode->crtc_clock)) { @@ -922,8 +921,6 @@ static bool i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); - in_vbl = position >= vbl_start && position < vbl_end; - /* * While in vblank, position will be negative * counting up towards 0 at vbl_end. And outside From f2654a4781318dc7ab8d6cde66f1fa39eab980a9 Mon Sep 17 00:00:00 2001 From: Fahad Kunnathadi Date: Fri, 15 Sep 2017 12:01:58 +0530 Subject: [PATCH 070/279] net: phy: Fix mask value write on gmii2rgmii converter speed register To clear Speed Selection in MDIO control register(0x10), ie, clear bits 6 and 13 to zero while keeping other bits same. Before AND operation,The Mask value has to be perform with bitwise NOT operation (ie, ~ operator) This patch clears current speed selection before writing the new speed settings to gmii2rgmii converter Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support") Signed-off-by: Fahad Kunnathadi Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/xilinx_gmii2rgmii.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index d15dd3938ba8..2e5150b0b8d5 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(struct phy_device *phydev) priv->phy_drv->read_status(phydev); val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG); - val &= XILINX_GMII2RGMII_SPEED_MASK; + val &= ~XILINX_GMII2RGMII_SPEED_MASK; if (phydev->speed == SPEED_1000) val |= BMCR_SPEED1000; From 8c22dab03ad072e45060c299c70d02a4f6fc4aab Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 15 Sep 2017 15:58:33 +0800 Subject: [PATCH 071/279] ip6_tunnel: do not allow loading ip6_tunnel if ipv6 is disabled in cmdline If ipv6 has been disabled from cmdline since kernel started, it makes no sense to allow users to create any ip6 tunnel. Otherwise, it could some potential problem. Jianlin found a kernel crash caused by this in ip6_gre when he set ipv6.disable=1 in grub: [ 209.588865] Unable to handle kernel paging request for data at address 0x00000080 [ 209.588872] Faulting instruction address: 0xc000000000a3aa6c [ 209.588879] Oops: Kernel access of bad area, sig: 11 [#1] [ 209.589062] NIP [c000000000a3aa6c] fib_rules_lookup+0x4c/0x260 [ 209.589071] LR [c000000000b9ad90] fib6_rule_lookup+0x50/0xb0 [ 209.589076] Call Trace: [ 209.589097] fib6_rule_lookup+0x50/0xb0 [ 209.589106] rt6_lookup+0xc4/0x110 [ 209.589116] ip6gre_tnl_link_config+0x214/0x2f0 [ip6_gre] [ 209.589125] ip6gre_newlink+0x138/0x3a0 [ip6_gre] [ 209.589134] rtnl_newlink+0x798/0xb80 [ 209.589142] rtnetlink_rcv_msg+0xec/0x390 [ 209.589151] netlink_rcv_skb+0x138/0x150 [ 209.589159] rtnetlink_rcv+0x48/0x70 [ 209.589169] netlink_unicast+0x538/0x640 [ 209.589175] netlink_sendmsg+0x40c/0x480 [ 209.589184] ___sys_sendmsg+0x384/0x4e0 [ 209.589194] SyS_sendmsg+0xd4/0x140 [ 209.589201] SyS_socketcall+0x3e0/0x4f0 [ 209.589209] system_call+0x38/0xe0 This patch is to return -EOPNOTSUPP in ip6_tunnel_init if ipv6 has been disabled from cmdline. Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ae73164559d5..f2f21c24915f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2259,6 +2259,9 @@ static int __init ip6_tunnel_init(void) { int err; + if (!ipv6_mod_enabled()) + return -EOPNOTSUPP; + err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto out_pernet; From 3ff4cbec87da48b0ec1f7b6196607b034de0c680 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Sat, 16 Sep 2017 14:02:21 +0200 Subject: [PATCH 072/279] net/sched: cls_matchall: fix crash when used with classful qdisc this script, edited from Linux Advanced Routing and Traffic Control guide tc q a dev en0 root handle 1: htb default a tc c a dev en0 parent 1: classid 1:1 htb rate 6mbit burst 15k tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b ping $address -c1 tc -s c s dev en0 classifies traffic to 1:b or 1:a, depending on whether the packet matches or not the pattern $clsargs of filter $clsname. However, when $clsname is 'matchall', a systematic crash can be observed in htb_classify(). HTB and classful qdiscs don't assign initial value to struct tcf_result, but then they expect it to contain valid values after filters have been run. Thus, current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured by user, and makes HTB (and classful qdiscs) dereference random pointers. By assigning head->res to *res in mall_classify(), before the actions are invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality, that had no effect on 'matchall' classifier since its first introduction. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213 Reported-by: Jiri Benc Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier") Signed-off-by: Davide Caratti Acked-by: Yotam Gigi Signed-off-by: David S. Miller --- net/sched/cls_matchall.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 21cc45caf842..eeac606c95ab 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tc_skip_sw(head->flags)) return -1; + *res = head->res; return tcf_exts_exec(skb, &head->exts, res); } From 51513748ddfe7a5d2812158a1e0570499b0c511c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 16 Sep 2017 13:10:06 -0700 Subject: [PATCH 073/279] Documentation: networking: fix ASCII art in switchdev.txt Fix ASCII art in Documentation/networking/switchdev.txt: Change non-ASCII "spaces" to ASCII spaces. Change 2 erroneous '+' characters in ASCII art to '-' (at the '*' characters below): line 32: +--+----+----+----+-*--+----+---+ +-----+-----+ line 41: +--------------+---*------------+ Signed-off-by: Randy Dunlap Acked-by: Pavel Machek Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/switchdev.txt | 64 +++++++++++++------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 5e40e1f68873..82236a17b5e6 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -13,42 +13,42 @@ an example setup using a data-center-class switch ASIC chip. Other setups with SR-IOV or soft switches, such as OVS, are possible. -                             User-space tools + User-space tools -       user space                   | -      +-------------------------------------------------------------------+ -       kernel                       | Netlink -                                    | -                     +--------------+-------------------------------+ -                     |         Network stack                        | -                     |           (Linux)                            | -                     |                                              | -                     +----------------------------------------------+ + user space | + +-------------------------------------------------------------------+ + kernel | Netlink + | + +--------------+-------------------------------+ + | Network stack | + | (Linux) | + | | + +----------------------------------------------+ sw1p2 sw1p4 sw1p6 -                      sw1p1  + sw1p3 +  sw1p5 +         eth1 -                        +    |    +    |    +    |            + -                        |    |    |    |    |    |            | -                     +--+----+----+----+-+--+----+---+  +-----+-----+ -                     |         Switch driver         |  |    mgmt   | -                     |        (this document)        |  |   driver  | -                     |                               |  |           | -                     +--------------+----------------+  +-----------+ -                                    | -       kernel                       | HW bus (eg PCI) -      +-------------------------------------------------------------------+ -       hardware                     | -                     +--------------+---+------------+ -                     |         Switch device (sw1)   | -                     |  +----+                       +--------+ -                     |  |    v offloaded data path   | mgmt port -                     |  |    |                       | -                     +--|----|----+----+----+----+---+ -                        |    |    |    |    |    | -                        +    +    +    +    +    + -                       p1   p2   p3   p4   p5   p6 + sw1p1 + sw1p3 + sw1p5 + eth1 + + | + | + | + + | | | | | | | + +--+----+----+----+----+----+---+ +-----+-----+ + | Switch driver | | mgmt | + | (this document) | | driver | + | | | | + +--------------+----------------+ +-----------+ + | + kernel | HW bus (eg PCI) + +-------------------------------------------------------------------+ + hardware | + +--------------+----------------+ + | Switch device (sw1) | + | +----+ +--------+ + | | v offloaded data path | mgmt port + | | | | + +--|----|----+----+----+----+---+ + | | | | | | + + + + + + + + p1 p2 p3 p4 p5 p6 -                             front-panel ports + front-panel ports Fig 1. From 6ce14f6416c84bd9c81777edf899b57ac5000c87 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Sep 2017 01:49:02 +0200 Subject: [PATCH 074/279] ACPI / watchdog: properly initialize resources We copy a local resource structure into a list, but only initialize some of its members, as pointed out by gcc-4.4: drivers/acpi/acpi_watchdog.c: In function 'acpi_watchdog_init': drivers/acpi/acpi_watchdog.c:105: error: 'res.child' may be used uninitialized in this function drivers/acpi/acpi_watchdog.c:105: error: 'res.sibling' may be used uninitialized in this function drivers/acpi/acpi_watchdog.c:105: error: 'res.parent' may be used uninitialized in this function drivers/acpi/acpi_watchdog.c:105: error: 'res.desc' may be used uninitialized in this function drivers/acpi/acpi_watchdog.c:105: error: 'res.name' may be used uninitialized in this function Newer compilers can presumably optimize the uninitialized access away entirely and don't warn at all, but rely on the kzalloc() to zero the structure first. This adds an explicit initialization to force consistent behavior. Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog) Signed-off-by: Arnd Bergmann Acked-by: Guenter Roeck Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_watchdog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index bf22c29d2517..11b113f8e367 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -66,7 +66,7 @@ void __init acpi_watchdog_init(void) for (i = 0; i < wdat->entries; i++) { const struct acpi_generic_address *gas; struct resource_entry *rentry; - struct resource res; + struct resource res = {}; bool found; gas = &entries[i].register_region; From 1e3c5ec66119783440ed211ae527674651affa9b Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Mon, 18 Sep 2017 17:05:37 +0530 Subject: [PATCH 075/279] bnxt_en: check for ingress qdisc in flower offload Check for ingress-only qdisc for flower offload, as other qdiscs are not supported for flower offload. Suggested-by: Jiri Pirko Signed-off-by: Sathya Perla Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index ccd699fb2d70..7dd3d131043a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -750,6 +750,10 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, { int rc = 0; + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) + return -EOPNOTSUPP; + switch (cls_flower->command) { case TC_CLSFLOWER_REPLACE: rc = bnxt_tc_add_flow(bp, src_fid, cls_flower); From 582db7e0c4c2fc5bb4f932f268035883385e3692 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 18 Sep 2017 15:03:46 +0200 Subject: [PATCH 076/279] bpf: devmap: pass on return value of bpf_map_precharge_memlock If bpf_map_precharge_memlock in dev_map_alloc, -ENOMEM is returned regardless of the actual error produced by bpf_map_precharge_memlock. Fix it by passing on the error returned by bpf_map_precharge_memlock. Also return -EINVAL instead of -ENOMEM if the page count overflow check fails. This makes dev_map_alloc match the behavior of other bpf maps' alloc functions wrt. return values. Signed-off-by: Tobias Klauser Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/devmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 959c9a07f318..e093d9a2c4dd 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -75,8 +75,8 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr) static struct bpf_map *dev_map_alloc(union bpf_attr *attr) { struct bpf_dtab *dtab; + int err = -EINVAL; u64 cost; - int err; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -108,6 +108,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (err) goto free_dtab; + err = -ENOMEM; + /* A per cpu bitfield with a bit per possible net device */ dtab->flush_needed = __alloc_percpu(dev_map_bitmap_size(attr), __alignof__(unsigned long)); @@ -128,7 +130,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) free_dtab: free_percpu(dtab->flush_needed); kfree(dtab); - return ERR_PTR(-ENOMEM); + return ERR_PTR(err); } static void dev_map_free(struct bpf_map *map) From 5e75fe3927559505682b0053b5745e3f42d66e8c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 18 Sep 2017 17:19:10 -0700 Subject: [PATCH 077/279] tools/testing/nvdimm: disable labels for nfit_test.1 Improve coverage of NVDIMM-N test scenarios by providing a test bus incapable of label operations. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index d20791c3f499..bef419d4266d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1527,9 +1527,6 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); - set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 18 Sep 2017 11:05:16 -0700 Subject: [PATCH 078/279] tcp: remove two unused functions remove tcp_may_send_now and tcp_snd_test that are no longer used Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - net/ipv4/tcp_output.c | 34 ---------------------------------- 2 files changed, 35 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index b510f284427a..3bc910a9bfc6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); -bool tcp_may_send_now(struct sock *sk); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1c839c99114c..517d737059d1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1806,40 +1806,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, return !after(end_seq, tcp_wnd_end(tp)); } -/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) - * should be put on the wire right now. If so, it returns the number of - * packets allowed by the congestion window. - */ -static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, - unsigned int cur_mss, int nonagle) -{ - const struct tcp_sock *tp = tcp_sk(sk); - unsigned int cwnd_quota; - - tcp_init_tso_segs(skb, cur_mss); - - if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) - return 0; - - cwnd_quota = tcp_cwnd_test(tp, skb); - if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss)) - cwnd_quota = 0; - - return cwnd_quota; -} - -/* Test if sending is allowed right now. */ -bool tcp_may_send_now(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_send_head(sk); - - return skb && - tcp_snd_test(sk, skb, tcp_current_mss(sk), - (tcp_skb_is_last(sk, skb) ? - tp->nonagle : TCP_NAGLE_PUSH)); -} - /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet * which is put after SKB on the list. It is very much like * tcp_fragment() except that it may make several kinds of assumptions From 33a56086712561b8b9cdc881e0317f4c36861f72 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 18 Sep 2017 14:48:58 -0700 Subject: [PATCH 079/279] libnvdimm, namespace: fix btt claim class crash Maurice reports: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: holder_class_store+0x253/0x2b0 [libnvdimm] ...while trying to reconfigure an NVDIMM-N namespace into 'sector' / 'btt' mode. The crash points to this line: (gdb) li *(holder_class_store+0x253) 0x7773 is in holder_class_store (drivers/nvdimm/namespace_devs.c:1420). 1415 for (i = 0; i < nd_region->ndr_mappings; i++) { 1416 struct nd_mapping *nd_mapping = &nd_region->mapping[i]; 1417 struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); 1418 struct nd_namespace_index *nsindex; 1419 1420 nsindex = to_namespace_index(ndd, ndd->ns_current); ...where we are failing because ndd is NULL due to NVDIMM-N dimms not supporting labels. Long story short, default to the BTTv1 format in the label-less / NVDIMM-N case. Fixes: 14e494542636 ("libnvdimm, btt: BTT updates for UEFI 2.7 format") Cc: Cc: Vishal Verma Reported-by: Maurice A. Saldivar Tested-by: Maurice A. Saldivar Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 1427a386a033..3e4d1e7998da 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1417,6 +1417,15 @@ static int btt_claim_class(struct device *dev) struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct nd_namespace_index *nsindex; + /* + * If any of the DIMMs do not support labels the only + * possible BTT format is v1. + */ + if (!ndd) { + loop_bitmask = 0; + break; + } + nsindex = to_namespace_index(ndd, ndd->ns_current); if (nsindex == NULL) loop_bitmask |= 1; From 54640d238760a1a54dfebe039b49682522100186 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 18 Sep 2017 22:51:14 -0500 Subject: [PATCH 080/279] fcntl: Don't set si_code to SI_SIGIO when sig == SIGPOLL When fixing things to avoid ambiguous cases I had a thinko and included SIGPOLL/SIGIO in with all of the other signals that have signal specific si_codes. Which is completely wrong. Fix that. Reported-by: Vince Weaver Signed-off-by: "Eric W. Biederman" --- fs/fcntl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index 0491da3b28c3..448a1119f0be 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -749,7 +749,7 @@ static void send_sigio_to_task(struct task_struct *p, * specific si_codes. In that case use SI_SIGIO instead * to remove the ambiguity. */ - if (sig_specific_sicodes(signum)) + if ((signum != SIGPOLL) && sig_specific_sicodes(signum)) si.si_code = SI_SIGIO; /* Make sure we are called with one of the POLL_* From 129c6cda2de2a8ac44fab096152469999b727faf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Sep 2017 13:03:43 -0700 Subject: [PATCH 081/279] 8139too: revisit napi_complete_done() usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems we have to be more careful in napi_complete_done() use. This patch is not a revert, as it seems we can avoid bug that Ville reported by moving the napi_complete_done() test in the spinlock section. Many thanks to Ville for detective work and all tests. Fixes: 617f01211baf ("8139too: use napi_complete_done()") Reported-by: Ville Syrjälä Tested-by: Ville Syrjälä Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/8139too.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index ca22f2898664..d24b47b8e0b2 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2135,11 +2135,12 @@ static int rtl8139_poll(struct napi_struct *napi, int budget) if (likely(RTL_R16(IntrStatus) & RxAckBits)) work_done += rtl8139_rx(dev, tp, budget); - if (work_done < budget && napi_complete_done(napi, work_done)) { + if (work_done < budget) { unsigned long flags; spin_lock_irqsave(&tp->lock, flags); - RTL_W16_F(IntrMask, rtl8139_intr_mask); + if (napi_complete_done(napi, work_done)) + RTL_W16_F(IntrMask, rtl8139_intr_mask); spin_unlock_irqrestore(&tp->lock, flags); } spin_unlock(&tp->rx_lock); From 8ecb1a29e11e24c458ee4ee59447d0ddf8274589 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 18 Sep 2017 16:31:30 -0700 Subject: [PATCH 082/279] net: systemport: Fix 64-bit statistics dependency There are several problems with commit 10377ba7673d ("net: systemport: Support 64bit statistics", first one got fixed in 7095c973453e ("net: systemport: Fix 64-bit stats deadlock"). The second problem is that this specific code updates the stats64.tx_{packets,bytes} from ndo_get_stats64() and that is what we are returning to ethtool -S. If we are not running a tool that involves calling ndo_get_stats64(), then we won't get updated ethtool stats. The solution to this is to update the stats from both call sites, factoring that into a specific function, While at it, don't just check the sizeof() but also the type of the statistics in order to use the 64-bit stats seqlock. Fixes: 10377ba7673d ("net: systemport: Support 64bit statistics") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 52 +++++++++++++--------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index c3c53f6cd9e6..83eec9a8c275 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -432,6 +432,27 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv) netif_dbg(priv, hw, priv->netdev, "updated MIB counters\n"); } +static void bcm_sysport_update_tx_stats(struct bcm_sysport_priv *priv, + u64 *tx_bytes, u64 *tx_packets) +{ + struct bcm_sysport_tx_ring *ring; + u64 bytes = 0, packets = 0; + unsigned int start; + unsigned int q; + + for (q = 0; q < priv->netdev->num_tx_queues; q++) { + ring = &priv->tx_rings[q]; + do { + start = u64_stats_fetch_begin_irq(&priv->syncp); + bytes = ring->bytes; + packets = ring->packets; + } while (u64_stats_fetch_retry_irq(&priv->syncp, start)); + + *tx_bytes += bytes; + *tx_packets += packets; + } +} + static void bcm_sysport_get_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -439,11 +460,16 @@ static void bcm_sysport_get_stats(struct net_device *dev, struct bcm_sysport_stats64 *stats64 = &priv->stats64; struct u64_stats_sync *syncp = &priv->syncp; struct bcm_sysport_tx_ring *ring; + u64 tx_bytes = 0, tx_packets = 0; unsigned int start; int i, j; - if (netif_running(dev)) + if (netif_running(dev)) { bcm_sysport_update_mib_counters(priv); + bcm_sysport_update_tx_stats(priv, &tx_bytes, &tx_packets); + stats64->tx_bytes = tx_bytes; + stats64->tx_packets = tx_packets; + } for (i = 0, j = 0; i < BCM_SYSPORT_STATS_LEN; i++) { const struct bcm_sysport_stats *s; @@ -461,12 +487,13 @@ static void bcm_sysport_get_stats(struct net_device *dev, continue; p += s->stat_offset; - if (s->stat_sizeof == sizeof(u64)) + if (s->stat_sizeof == sizeof(u64) && + s->type == BCM_SYSPORT_STAT_NETDEV64) { do { start = u64_stats_fetch_begin_irq(syncp); data[i] = *(u64 *)p; } while (u64_stats_fetch_retry_irq(syncp, start)); - else + } else data[i] = *(u32 *)p; j++; } @@ -1716,27 +1743,12 @@ static void bcm_sysport_get_stats64(struct net_device *dev, { struct bcm_sysport_priv *priv = netdev_priv(dev); struct bcm_sysport_stats64 *stats64 = &priv->stats64; - struct bcm_sysport_tx_ring *ring; - u64 tx_packets = 0, tx_bytes = 0; unsigned int start; - unsigned int q; netdev_stats_to_stats64(stats, &dev->stats); - for (q = 0; q < dev->num_tx_queues; q++) { - ring = &priv->tx_rings[q]; - do { - start = u64_stats_fetch_begin_irq(&priv->syncp); - tx_bytes = ring->bytes; - tx_packets = ring->packets; - } while (u64_stats_fetch_retry_irq(&priv->syncp, start)); - - stats->tx_bytes += tx_bytes; - stats->tx_packets += tx_packets; - } - - stats64->tx_bytes = stats->tx_bytes; - stats64->tx_packets = stats->tx_packets; + bcm_sysport_update_tx_stats(priv, &stats->tx_bytes, + &stats->tx_packets); do { start = u64_stats_fetch_begin_irq(&priv->syncp); From c8b850241559d6bad7e640075e282a83a4ad7ead Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 18 Sep 2017 12:25:22 +0200 Subject: [PATCH 083/279] s390/scm_blk: consistently use blk_status_t as error type Fix these warnings found by sparse: drivers/s390/block/scm_blk.c:257:24: warning: incorrect type in assignment (different base types) drivers/s390/block/scm_blk.c:257:24: expected int [signed] drivers/s390/block/scm_blk.c:257:24: got restricted blk_status_t [usertype] error drivers/s390/block/scm_blk.c:420:33: warning: incorrect type in argument 2 (different base types) drivers/s390/block/scm_blk.c:420:33: expected restricted blk_status_t [usertype] error drivers/s390/block/scm_blk.c:420:33: got int [signed] Signed-off-by: Sebastian Ott Reported-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/scm_blk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 2e7fd966c515..eb51893c74a4 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -249,7 +249,7 @@ static void scm_request_requeue(struct scm_request *scmrq) static void scm_request_finish(struct scm_request *scmrq) { struct scm_blk_dev *bdev = scmrq->bdev; - int *error; + blk_status_t *error; int i; for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) { @@ -415,7 +415,7 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error) static void scm_blk_request_done(struct request *req) { - int *error = blk_mq_rq_to_pdu(req); + blk_status_t *error = blk_mq_rq_to_pdu(req); blk_mq_end_request(req, *error); } @@ -450,7 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) atomic_set(&bdev->queued_reqs, 0); bdev->tag_set.ops = &scm_mq_ops; - bdev->tag_set.cmd_size = sizeof(int); + bdev->tag_set.cmd_size = sizeof(blk_status_t); bdev->tag_set.nr_hw_queues = nr_requests; bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests; bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; From 55fb7347579017bdb09550b4e8019447340b7004 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 14 Sep 2017 13:55:22 +0200 Subject: [PATCH 084/279] s390/cio: recover from bad paths In some situations we don't receive notification from firmware that a previously unusable channelpath is usable again. Schedule recovery for devices that return from path verification without using all potentially usable paths. The recovery thread will periodically trigger a path verification on the affected devices. Signed-off-by: Sebastian Ott Suggested-by: Peter Oberparleiter Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/device.c | 12 +++++++++--- drivers/s390/cio/device.h | 1 + drivers/s390/cio/device_fsm.c | 12 ++++++++++++ drivers/s390/cio/io_sch.h | 2 ++ 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 489b583f263d..e5c32f4b5287 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev) static int recovery_check(struct device *dev, void *data) { struct ccw_device *cdev = to_ccwdev(dev); + struct subchannel *sch; int *redo = data; spin_lock_irq(cdev->ccwlock); switch (cdev->private->state) { + case DEV_STATE_ONLINE: + sch = to_subchannel(cdev->dev.parent); + if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm) + break; + /* fall through */ case DEV_STATE_DISCONNECTED: CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n", cdev->private->dev_id.ssid, @@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused) } spin_unlock_irq(&recovery_lock); } else - CIO_MSG_EVENT(4, "recovery: end\n"); + CIO_MSG_EVENT(3, "recovery: end\n"); } static DECLARE_WORK(recovery_work, recovery_work_func); @@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data) schedule_work(&recovery_work); } -static void ccw_device_schedule_recovery(void) +void ccw_device_schedule_recovery(void) { unsigned long flags; - CIO_MSG_EVENT(4, "recovery: schedule\n"); + CIO_MSG_EVENT(3, "recovery: schedule\n"); spin_lock_irqsave(&recovery_lock, flags); if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) { recovery_phase = 0; diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index ec497af99dd8..69cb70f080a5 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev); void ccw_device_set_notoper(struct ccw_device *cdev); void ccw_device_set_timeout(struct ccw_device *, int); +void ccw_device_schedule_recovery(void); /* Channel measurement facility related */ void retry_set_schib(struct ccw_device *cdev); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 12016e32e519..f98ea674c3d8 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type) } } +static void ccw_device_handle_broken_paths(struct ccw_device *cdev) +{ + struct subchannel *sch = to_subchannel(cdev->dev.parent); + u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm; + + if (broken_paths && (cdev->private->path_broken_mask != broken_paths)) + ccw_device_schedule_recovery(); + + cdev->private->path_broken_mask = broken_paths; +} + void ccw_device_verify_done(struct ccw_device *cdev, int err) { struct subchannel *sch; @@ -508,6 +519,7 @@ callback: memset(&cdev->private->irb, 0, sizeof(struct irb)); } ccw_device_report_path_events(cdev); + ccw_device_handle_broken_paths(cdev); break; case -ETIME: case -EUSERS: diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index 220f49145b2f..9a1b56b2df3e 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -131,6 +131,8 @@ struct ccw_device_private { not operable */ u8 path_gone_mask; /* mask of paths, that became unavailable */ u8 path_new_mask; /* mask of paths, that became available */ + u8 path_broken_mask; /* mask of paths, which were found to be + unusable */ struct { unsigned int fast:1; /* post with "channel end" */ unsigned int repall:1; /* report every interrupt status */ From 91c575b335766effa6103eba42a82aea560c365f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 18 Sep 2017 16:10:35 +0200 Subject: [PATCH 085/279] s390/mm: make pmdp_invalidate() do invalidation only Commit 227be799c39a ("s390/mm: uninline pmdp_xxx functions from pgtable.h") inadvertently changed the behavior of pmdp_invalidate(), so that it now clears the pmd instead of just marking it as invalid. Fix this by restoring the original behavior. A possible impact of the misbehaving pmdp_invalidate() would be the MADV_DONTNEED races (see commits ced10803 and 58ceeb6b), although we should not have any negative impact on the related dirty/young flags, since those flags are not set by the hardware on s390. Fixes: 227be799c39a ("s390/mm: uninline pmdp_xxx functions from pgtable.h") Cc: # v4.6+ Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index dce708e061ea..20e75a2ca93a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1507,7 +1507,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, static inline void pmdp_invalidate(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID); + + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT From ba385c0594e723d41790ecfb12c610e6f90c7785 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Mon, 18 Sep 2017 16:51:51 +0200 Subject: [PATCH 086/279] s390/mm: fix write access check in gup_huge_pmd() The check for the _SEGMENT_ENTRY_PROTECT bit in gup_huge_pmd() is the wrong way around. It must not be set for write==1, and not be checked for write==0. Fix this similar to how it was fixed for ptes long time ago in commit 25591b070336 ("[S390] fix get_user_pages_fast"). One impact of this bug would be unnecessarily using the gup slow path for write==0 on r/w mappings. A potentially more severe impact would be that gup_huge_pmd() will succeed for write==1 on r/o mappings. Cc: Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/gup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 8ecc25e760fa..98ffe3ee9411 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask, result; struct page *head, *page; + unsigned long mask; int refs; - result = write ? 0 : _SEGMENT_ENTRY_PROTECT; - mask = result | _SEGMENT_ENTRY_INVALID; - if ((pmd_val(pmd) & mask) != result) + mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID; + if ((pmd_val(pmd) & mask) != 0) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); From 95e2a3b3ef177730019e3799917193595133b275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Sat, 16 Sep 2017 22:12:24 +0200 Subject: [PATCH 087/279] Revert "KVM: Don't accept obviously wrong gsi values via KVM_IRQFD" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 36ae3c0a36b7456432fedce38ae2f7bd3e01a563. The commit broke compilation on !CONFIG_HAVE_KVM_IRQ_ROUTING. Also, there may be cases with CONFIG_HAVE_KVM_IRQ_ROUTING, where larger gsi values make sense. As the commit was meant as an early indicator to user space that something is wrong, reverting just restores the previous behavior where overly large values are ignored when encountered (without any direct feedback). Reported-by: Abdul Haleem Signed-off-by: Jan H. Schönherr Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- virt/kvm/eventfd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c608ab495282..f2ac53ab8243 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -565,8 +565,6 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE)) return -EINVAL; - if (args->gsi >= KVM_MAX_IRQ_ROUTES) - return -EINVAL; if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) return kvm_irqfd_deassign(kvm, args); From a4aaeccc7a91e24cc17f72a7eb2f586f5c3d811d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 10 Sep 2017 13:43:37 -0700 Subject: [PATCH 088/279] iommu: Add missing dependencies parisc:allmodconfig, xtensa:allmodconfig, and possibly others generate the following Kconfig warning. warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT && HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64)) IOMMU_IO_PGTABLE_LPAE depends on (COMPILE_TEST && !GENERIC_ATOMIC64), so any configuration option selecting it needs to have the same dependencies. Signed-off-by: Guenter Roeck Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 49bd2ab8c507..8530ef78925d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -278,7 +278,7 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" depends on ARM || IOMMU_DMA - depends on ARCH_RENESAS || COMPILE_TEST + depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64) select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU @@ -373,7 +373,7 @@ config MTK_IOMMU_V1 config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" - depends on ARCH_QCOM || COMPILE_TEST + depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU From 3bd71e18c5803c23014df962bdd74af1b34697fe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Sep 2017 22:10:21 +0200 Subject: [PATCH 089/279] iommu/vt-d: Fix harmless section mismatch warning Building with gcc-4.6 results in this warning due to dmar_table_print_dmar_entry being inlined as in newer compiler versions: WARNING: vmlinux.o(.text+0x5c8bee): Section mismatch in reference from the function dmar_walk_remapping_entries() to the function .init.text:dmar_table_print_dmar_entry() The function dmar_walk_remapping_entries() references the function __init dmar_table_print_dmar_entry(). This is often because dmar_walk_remapping_entries lacks a __init annotation or the annotation of dmar_table_print_dmar_entry is wrong. This removes the __init annotation to avoid the warning. On compilers that don't show the warning today, this should have no impact since the function gets inlined anyway. Signed-off-by: Arnd Bergmann Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index ca5ebaeafd6a..57c920c1372d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -497,7 +497,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) #define dmar_parse_one_rhsa dmar_res_noop #endif -static void __init +static void dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { struct acpi_dmar_hardware_unit *drhd; From 5baf6bb0fd2388742a0846cc7bcacee6dec78235 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Sep 2017 14:01:00 +0200 Subject: [PATCH 090/279] drm/exynos: Fix locking in the suspend/resume paths Commit 48a92916729b ("drm/exynos: use drm_for_each_connector_iter()") replaced unsafe drm_for_each_connector() with drm_for_each_connector_iter() and removed surrounding drm_modeset_lock calls. However, that lock was there not only to protect unsafe drm_for_each_connector(), but it was also required to be held by the dpms code which was called from the loop body. This patch restores those drm_modeset_lock calls to fix broken suspend and resume of Exynos DRM subsystem in v4.13 kernel. Fixes: 48a92916729b ("drm/exynos: use drm_for_each_connector_iter()") CC: stable@vger.kernel.org # v4.13 Signed-off-by: Marek Szyprowski Acked-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index b1f7299600f0..7f3cfc5dd320 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -174,6 +174,7 @@ static int exynos_drm_suspend(struct device *dev) if (pm_runtime_suspended(dev) || !drm_dev) return 0; + drm_modeset_lock_all(drm_dev); drm_connector_list_iter_begin(drm_dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { int old_dpms = connector->dpms; @@ -185,6 +186,7 @@ static int exynos_drm_suspend(struct device *dev) connector->dpms = old_dpms; } drm_connector_list_iter_end(&conn_iter); + drm_modeset_unlock_all(drm_dev); return 0; } @@ -198,6 +200,7 @@ static int exynos_drm_resume(struct device *dev) if (pm_runtime_suspended(dev) || !drm_dev) return 0; + drm_modeset_lock_all(drm_dev); drm_connector_list_iter_begin(drm_dev, &conn_iter); drm_for_each_connector_iter(connector, &conn_iter) { if (connector->funcs->dpms) { @@ -208,6 +211,7 @@ static int exynos_drm_resume(struct device *dev) } } drm_connector_list_iter_end(&conn_iter); + drm_modeset_unlock_all(drm_dev); return 0; } From 6e8edf8a7d8d98e1734ff41e5c69439419319402 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 14 Sep 2017 14:01:01 +0200 Subject: [PATCH 091/279] drm/exynos: Fix suspend/resume support Commit 7d902c05b480 ("drm: Nuke drm_atomic_helper_connector_dpms") removed drm_atomic_helper_connector_dpms() helper saying that it was a dead code. It was however indirectly used by Exynos DRM driver for implementing suspend/resume support. To fix this regression (after that patch Exynos DRM suspend/resume functions became no-ops and hardware fails to suspend), this patch rewrites them with drm_atomic_helper_suspend/resume() helpers. Fixes: 7d902c05b480 ("drm: Nuke drm_atomic_helper_connector_dpms") Signed-off-by: Marek Szyprowski Acked-by: Krzysztof Kozlowski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 40 +++++++---------------- drivers/gpu/drm/exynos/exynos_drm_drv.h | 1 + drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 20 ++++++++++++ drivers/gpu/drm/exynos/exynos_drm_fbdev.h | 10 ++++++ 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 7f3cfc5dd320..e651a58c18cf 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -168,25 +168,19 @@ static struct drm_driver exynos_drm_driver = { static int exynos_drm_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; + struct exynos_drm_private *private = drm_dev->dev_private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; - drm_modeset_lock_all(drm_dev); - drm_connector_list_iter_begin(drm_dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - int old_dpms = connector->dpms; - - if (connector->funcs->dpms) - connector->funcs->dpms(connector, DRM_MODE_DPMS_OFF); - - /* Set the old mode back to the connector for resume */ - connector->dpms = old_dpms; + drm_kms_helper_poll_disable(drm_dev); + exynos_drm_fbdev_suspend(drm_dev); + private->suspend_state = drm_atomic_helper_suspend(drm_dev); + if (IS_ERR(private->suspend_state)) { + exynos_drm_fbdev_resume(drm_dev); + drm_kms_helper_poll_enable(drm_dev); + return PTR_ERR(private->suspend_state); } - drm_connector_list_iter_end(&conn_iter); - drm_modeset_unlock_all(drm_dev); return 0; } @@ -194,24 +188,14 @@ static int exynos_drm_suspend(struct device *dev) static int exynos_drm_resume(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_connector *connector; - struct drm_connector_list_iter conn_iter; + struct exynos_drm_private *private = drm_dev->dev_private; if (pm_runtime_suspended(dev) || !drm_dev) return 0; - drm_modeset_lock_all(drm_dev); - drm_connector_list_iter_begin(drm_dev, &conn_iter); - drm_for_each_connector_iter(connector, &conn_iter) { - if (connector->funcs->dpms) { - int dpms = connector->dpms; - - connector->dpms = DRM_MODE_DPMS_OFF; - connector->funcs->dpms(connector, dpms); - } - } - drm_connector_list_iter_end(&conn_iter); - drm_modeset_unlock_all(drm_dev); + drm_atomic_helper_resume(drm_dev, private->suspend_state); + exynos_drm_fbdev_resume(drm_dev); + drm_kms_helper_poll_enable(drm_dev); return 0; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index cf131c2aa23e..f8bae4cb4823 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -202,6 +202,7 @@ struct drm_exynos_file_private { */ struct exynos_drm_private { struct drm_fb_helper *fb_helper; + struct drm_atomic_state *suspend_state; struct device *dma_dev; void *mapping; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index c3a068409b48..dfb66ecf417b 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -18,6 +18,8 @@ #include #include +#include + #include "exynos_drm_drv.h" #include "exynos_drm_fb.h" #include "exynos_drm_fbdev.h" @@ -285,3 +287,21 @@ void exynos_drm_output_poll_changed(struct drm_device *dev) drm_fb_helper_hotplug_event(fb_helper); } + +void exynos_drm_fbdev_suspend(struct drm_device *dev) +{ + struct exynos_drm_private *private = dev->dev_private; + + console_lock(); + drm_fb_helper_set_suspend(private->fb_helper, 1); + console_unlock(); +} + +void exynos_drm_fbdev_resume(struct drm_device *dev) +{ + struct exynos_drm_private *private = dev->dev_private; + + console_lock(); + drm_fb_helper_set_suspend(private->fb_helper, 0); + console_unlock(); +} diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h index 330eef87f718..645d1bb7f665 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h @@ -21,6 +21,8 @@ int exynos_drm_fbdev_init(struct drm_device *dev); void exynos_drm_fbdev_fini(struct drm_device *dev); void exynos_drm_fbdev_restore_mode(struct drm_device *dev); void exynos_drm_output_poll_changed(struct drm_device *dev); +void exynos_drm_fbdev_suspend(struct drm_device *drm); +void exynos_drm_fbdev_resume(struct drm_device *drm); #else @@ -39,6 +41,14 @@ static inline void exynos_drm_fbdev_restore_mode(struct drm_device *dev) #define exynos_drm_output_poll_changed (NULL) +static inline void exynos_drm_fbdev_suspend(struct drm_device *drm) +{ +} + +static inline void exynos_drm_fbdev_resume(struct drm_device *drm) +{ +} + #endif #endif From 9ac30ef6d8ec3533d0feec53e268c4fa32ea700c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Sep 2017 10:19:55 +0200 Subject: [PATCH 092/279] drm: exynos: include linux/irq.h I ran into a build error on x86: drivers/gpu/drm/exynos/exynos5433_drm_decon.c: In function 'decon_conf_irq': drivers/gpu/drm/exynos/exynos5433_drm_decon.c:706:2: error: implicit declaration of function 'irq_set_status_flags'; did you mean 'dquot_state_flag'? [-Werror=implicit-function-declaration] irq_set_status_flags(irq, IRQ_NOAUTOEN); Adding the missing include fixes the error. Fixes: b37d53a0382c ("drm/exynos/decon5433: move TE handling to DECON") Signed-off-by: Arnd Bergmann Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c index 730b8d9db187..6be5b53c3b27 100644 --- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include From d6500149bc4fddc5a91cd1a0c31b38fa36bff3ee Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 18 Sep 2017 18:45:01 +0800 Subject: [PATCH 093/279] KVM: x86: Fix the NULL pointer parameter in check_cr_write() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Routine check_cr_write() will trigger emulator_get_cpuid()-> kvm_cpuid() to get maxphyaddr, and NULL is passed as values for ebx/ecx/edx. This is problematic because kvm_cpuid() will dereference these pointers. Fixes: d1cd3ce90044 ("KVM: MMU: check guest CR3 reserved bits based on its physical address width.") Reported-by: Jim Mattson Signed-off-by: Yu Zhang Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/emulate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 16bf6655aa85..15f527b44aa7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4102,10 +4102,12 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); if (efer & EFER_LMA) { u64 maxphyaddr; - u32 eax = 0x80000008; + u32 eax, ebx, ecx, edx; - if (ctxt->ops->get_cpuid(ctxt, &eax, NULL, NULL, - NULL, false)) + eax = 0x80000008; + ecx = 0; + if (ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, + &edx, false)) maxphyaddr = eax & 0xff; else maxphyaddr = 36; From dc91f2eb1a4021eb6705c15e474942f84ab9b211 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Mon, 18 Sep 2017 09:56:49 +0800 Subject: [PATCH 094/279] KVM: VMX: do not change SN bit in vmx_update_pi_irte() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In kvm_vcpu_trigger_posted_interrupt() and pi_pre_block(), KVM assumes that PI notification events should not be suppressed when the target vCPU is not blocked. vmx_update_pi_irte() sets the SN field before changing an interrupt from posting to remapping, but it does not check the vCPU mode. Therefore, the change of SN field may break above the assumption. Besides, I don't see reasons to suppress notification events here, so remove the changes of SN field to avoid race condition. Signed-off-by: Haozhong Zhang Reported-by: "Ramamurthy, Venkatesh" Reported-by: Dan Williams Reviewed-by: Paolo Bonzini Fixes: 28b835d60fcc ("KVM: Update Posted-Interrupts Descriptor when vCPU is preempted") Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 06c0c6d0541e..7328c8c0ea3b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11911,12 +11911,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq, if (set) ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else { - /* suppress notification event before unposting */ - pi_set_sn(vcpu_to_pi_desc(vcpu)); + else ret = irq_set_vcpu_affinity(host_irq, NULL); - pi_clear_sn(vcpu_to_pi_desc(vcpu)); - } if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", From 5753743fa5108b8f98bd61e40dc63f641b26c768 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Mon, 18 Sep 2017 09:56:50 +0800 Subject: [PATCH 095/279] KVM: VMX: remove WARN_ON_ONCE in kvm_vcpu_trigger_posted_interrupt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)) in kvm_vcpu_trigger_posted_interrupt() intends to detect the violation of invariant that VT-d PI notification event is not suppressed when vcpu is in the guest mode. Because the two checks for the target vcpu mode and the target suppress field cannot be performed atomically, the target vcpu mode may change in between. If that does happen, WARN_ON_ONCE() here may raise false alarms. As the previous patch fixed the real invariant breaker, remove this WARN_ON_ONCE() to avoid false alarms, and document the allowed cases instead. Signed-off-by: Haozhong Zhang Reported-by: "Ramamurthy, Venkatesh" Reported-by: Dan Williams Reviewed-by: Paolo Bonzini Fixes: 28b835d60fcc ("KVM: Update Posted-Interrupts Descriptor when vCPU is preempted") Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7328c8c0ea3b..0726ca7a1b02 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5077,21 +5077,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; if (vcpu->mode == IN_GUEST_MODE) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * Currently, we don't support urgent interrupt, - * all interrupts are recognized as non-urgent - * interrupt, so we cannot post interrupts when - * 'SN' is set. + * The vector of interrupt to be delivered to vcpu had + * been set in PIR before this function. * - * If the vcpu is in guest mode, it means it is - * running instead of being scheduled out and - * waiting in the run queue, and that's the only - * case when 'SN' is set currently, warning if - * 'SN' is set. + * Following cases will be reached in this block, and + * we always send a notification event in all cases as + * explained below. + * + * Case 1: vcpu keeps in non-root mode. Sending a + * notification event posts the interrupt to vcpu. + * + * Case 2: vcpu exits to root mode and is still + * runnable. PIR will be synced to vIRR before the + * next vcpu entry. Sending a notification event in + * this case has no effect, as vcpu is not in root + * mode. + * + * Case 3: vcpu exits to root mode and is blocked. + * vcpu_block() has already synced PIR to vIRR and + * never blocks vcpu if vIRR is not cleared. Therefore, + * a blocked vcpu here does not wait for any requested + * interrupts in PIR, and sending a notification event + * which has no effect is safe here. */ - WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc)); apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); return true; From 0555ac4333d7da7cff83d5b06bd3679a3e1ef584 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Mon, 18 Sep 2017 16:35:32 -0600 Subject: [PATCH 096/279] xen, arm64: drop dummy lookup_address() This is unused, and conflicts with the definition that we'll add for XPFO. Signed-off-by: Tycho Andersen Reviewed-by: Julien Grall CC: Boris Ostrovsky CC: Juergen Gross CC: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- include/xen/arm/page.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h index 415dbc6e43fd..6adc2a955340 100644 --- a/include/xen/arm/page.h +++ b/include/xen/arm/page.h @@ -84,16 +84,6 @@ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr) BUG(); } -/* TODO: this shouldn't be here but it is because the frontend drivers - * are using it (its rolled in headers) even though we won't hit the code path. - * So for right now just punt with this. - */ -static inline pte_t *lookup_address(unsigned long address, unsigned int *level) -{ - BUG(); - return NULL; -} - extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count); From 986a5f70173626eea9863fee6d6e029f0f2bc361 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 11 Sep 2017 14:34:34 +0200 Subject: [PATCH 097/279] iommu/qcom: Depend on HAS_DMA to fix compile error If NO_DMA=y: warning: (IPMMU_VMSA && ARM_SMMU && ARM_SMMU_V3 && QCOM_IOMMU) selects IOMMU_IO_PGTABLE_LPAE which has unmet direct dependencies (IOMMU_SUPPORT && HAS_DMA && (ARM || ARM64 || COMPILE_TEST && !GENERIC_ATOMIC64)) and drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_sync_pte': io-pgtable-arm.c:(.text+0x206): undefined reference to `bad_dma_ops' drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_free_pages': io-pgtable-arm.c:(.text+0x6a6): undefined reference to `bad_dma_ops' drivers/iommu/io-pgtable-arm.o: In function `__arm_lpae_alloc_pages': io-pgtable-arm.c:(.text+0x812): undefined reference to `bad_dma_ops' io-pgtable-arm.c:(.text+0x81c): undefined reference to `bad_dma_ops' io-pgtable-arm.c:(.text+0x862): undefined reference to `bad_dma_ops' drivers/iommu/io-pgtable-arm.o: In function `arm_lpae_run_tests': io-pgtable-arm.c:(.init.text+0x86): undefined reference to `alloc_io_pgtable_ops' io-pgtable-arm.c:(.init.text+0x47c): undefined reference to `free_io_pgtable_ops' drivers/iommu/qcom_iommu.o: In function `qcom_iommu_init_domain': qcom_iommu.c:(.text+0x1ce): undefined reference to `alloc_io_pgtable_ops' drivers/iommu/qcom_iommu.o: In function `qcom_iommu_domain_free': qcom_iommu.c:(.text+0x754): undefined reference to `free_io_pgtable_ops' QCOM_IOMMU selects IOMMU_IO_PGTABLE_LPAE, which bypasses its dependency on HAS_DMA. Make QCOM_IOMMU depend on HAS_DMA to fix this. Fixes: 0ae349a0f33fb040 ("iommu/qcom: Add qcom_iommu") Signed-off-by: Geert Uytterhoeven Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 8530ef78925d..f3a21343e636 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -374,6 +374,7 @@ config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on HAS_DMA select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU From 8dd33bcb7050dd6f8c1432732f930932c9d3a33e Mon Sep 17 00:00:00 2001 From: Bo Yan Date: Mon, 18 Sep 2017 10:03:35 -0700 Subject: [PATCH 098/279] tracing: Erase irqsoff trace with empty write One convenient way to erase trace is "echo > trace". However, this is currently broken if the current tracer is irqsoff tracer. This is because irqsoff tracer use max_buffer as the default trace buffer. Set the max_buffer as the one to be cleared when it's the trace buffer currently in use. Link: http://lkml.kernel.org/r/1505754215-29411-1-git-send-email-byan@nvidia.com Cc: Cc: stable@vger.kernel.org Fixes: 4acd4d00f ("tracing: give easy way to clear trace buffer") Signed-off-by: Bo Yan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5360b7aec57a..a7fb136da891 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4020,11 +4020,17 @@ static int tracing_open(struct inode *inode, struct file *file) /* If this file was open for write, then erase contents */ if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { int cpu = tracing_get_cpu(inode); + struct trace_buffer *trace_buf = &tr->trace_buffer; + +#ifdef CONFIG_TRACER_MAX_TRACE + if (tr->current_trace->print_max) + trace_buf = &tr->max_buffer; +#endif if (cpu == RING_BUFFER_ALL_CPUS) - tracing_reset_online_cpus(&tr->trace_buffer); + tracing_reset_online_cpus(trace_buf); else - tracing_reset(&tr->trace_buffer, cpu); + tracing_reset(trace_buf, cpu); } if (file->f_mode & FMODE_READ) { From c7b3ae0bd2ca658c7a71c49901d08c590294fac9 Mon Sep 17 00:00:00 2001 From: "Ziqian SUN (Zamir)" Date: Mon, 11 Sep 2017 14:26:35 +0800 Subject: [PATCH 099/279] tracing: Ignore mmiotrace from kernel commandline The mmiotrace tracer cannot be enabled with ftrace=mmiotrace in kernel commandline. With this patch, noboot is added to the tracer struct, and when system boot with a tracer that has noboot=true, it will print out a warning message and continue booting. Link: http://lkml.kernel.org/r/1505111195-31942-1-git-send-email-zsun@redhat.com Signed-off-by: Ziqian SUN (Zamir) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 7 +++++++ kernel/trace/trace.h | 2 ++ kernel/trace/trace_mmiotrace.c | 1 + 3 files changed, 10 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a7fb136da891..d3ca35f38803 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5364,6 +5364,13 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (t == tr->current_trace) goto out; + /* Some tracers won't work on kernel command line */ + if (system_state < SYSTEM_RUNNING && t->noboot) { + pr_warn("Tracer '%s' is not allowed on command line, ignored\n", + t->name); + goto out; + } + /* Some tracers are only allowed for the top level buffer */ if (!trace_ok_for_array(t, tr)) { ret = -EINVAL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index fb5d54d0d1b3..652c682707cd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -444,6 +444,8 @@ struct tracer { #ifdef CONFIG_TRACER_MAX_TRACE bool use_max_tr; #endif + /* True if tracer cannot be enabled in kernel param */ + bool noboot; }; diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index cd7480d0a201..dca78fc48439 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -282,6 +282,7 @@ static struct tracer mmio_tracer __read_mostly = .close = mmio_close, .read = mmio_read, .print_line = mmio_print_line, + .noboot = true, }; __init static int init_mmio_trace(void) From 3993491bf27117782bee05debc6a6afa51d61760 Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Tue, 19 Sep 2017 12:54:34 +0300 Subject: [PATCH 100/279] MAINTAINERS: Remove Yuval Mintz from maintainers list Remove Yuval from maintaining the bnx2x & qed* modules as he is no longer working for the company. Thanks Yuval for your huge contributions and tireless efforts over the many years and various companies. Ariel Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2281af4b41b6..955f034fd523 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2853,7 +2853,6 @@ S: Supported F: drivers/scsi/bnx2i/ BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER -M: Yuval Mintz M: Ariel Elior M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org @@ -11047,7 +11046,6 @@ S: Supported F: drivers/scsi/qedi/ QLOGIC QL4xxx ETHERNET DRIVER -M: Yuval Mintz M: Ariel Elior M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org From 29a0cfbf91ba997591535a4f7246835ce8328141 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 18 Sep 2017 12:21:37 +0200 Subject: [PATCH 101/279] libceph: don't allow bidirectional swap of pg-upmap-items This reverts most of commit f53b7665c8ce ("libceph: upmap semantic changes"). We need to prevent duplicates in the final result. For example, we can currently take [1,2,3] and apply [(1,2)] and get [2,2,3] or [1,2,3] and apply [(3,2)] and get [1,2,2] The rest of the system is not prepared to handle duplicates in the result set like this. The reverted piece was intended to allow [1,2,3] and [(1,2),(2,1)] to get [2,1,3] to reorder primaries. First, this bidirectional swap is hard to implement in a way that also prevents dups. For example, [1,2,3] and [(1,4),(2,3),(3,4)] would give [4,3,4] but would we just drop the last step we'd have [4,3,3] which is also invalid, etc. Simpler to just not handle bidirectional swaps. In practice, they are not needed: if you just want to choose a different primary then use primary_affinity, or pg_upmap (not pg_upmap_items). Cc: stable@vger.kernel.org # 4.13 Link: http://tracker.ceph.com/issues/21410 Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/osdmap.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index f358d0bfa76b..79d14d70b7ea 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -2445,19 +2445,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap, pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); if (pg) { - for (i = 0; i < raw->size; i++) { - for (j = 0; j < pg->pg_upmap_items.len; j++) { - int from = pg->pg_upmap_items.from_to[j][0]; - int to = pg->pg_upmap_items.from_to[j][1]; + /* + * Note: this approach does not allow a bidirectional swap, + * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. + */ + for (i = 0; i < pg->pg_upmap_items.len; i++) { + int from = pg->pg_upmap_items.from_to[i][0]; + int to = pg->pg_upmap_items.from_to[i][1]; + int pos = -1; + bool exists = false; - if (from == raw->osds[i]) { - if (!(to != CRUSH_ITEM_NONE && - to < osdmap->max_osd && - osdmap->osd_weight[to] == 0)) - raw->osds[i] = to; + /* make sure replacement doesn't already appear */ + for (j = 0; j < raw->size; j++) { + int osd = raw->osds[j]; + + if (osd == to) { + exists = true; break; } + /* ignore mapping if target is marked out */ + if (osd == from && pos < 0 && + !(to != CRUSH_ITEM_NONE && + to < osdmap->max_osd && + osdmap->osd_weight[to] == 0)) { + pos = j; + } } + if (!exists && pos >= 0) + raw->osds[pos] = to; } } } From 717e6f2893eb35ce6728c3cacdc297b78d371b31 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 11 Sep 2017 12:10:08 +0800 Subject: [PATCH 102/279] ceph: avoid panic in create_session_open_msg() if utsname() returns NULL utsname() can return NULL while process is exiting. Kernel releases file locks during process exits. We send request to mds when releasing file lock. So it's possible that we open mds session while process is exiting. utsname() is called in create_session_open_msg(). Link: http://tracker.ceph.com/issues/21275 Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton [idryomov@gmail.com: drop utsname.h include from mds_client.c] Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 7 ++++--- fs/ceph/mds_client.h | 3 +++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9dd6b836ac9e..84edfc60d87a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "super.h" @@ -884,8 +883,8 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 void *p; const char* metadata[][2] = { - {"hostname", utsname()->nodename}, - {"kernel_version", utsname()->release}, + {"hostname", mdsc->nodename}, + {"kernel_version", init_utsname()->release}, {"entity_id", opt->name ? : ""}, {"root", fsopt->server_path ? : "/"}, {NULL, NULL} @@ -3539,6 +3538,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) init_rwsem(&mdsc->pool_perm_rwsem); mdsc->pool_perm_tree = RB_ROOT; + strncpy(mdsc->nodename, utsname()->nodename, + sizeof(mdsc->nodename) - 1); return 0; } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index db57ae98ed34..636d6b2ec49c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -368,6 +369,8 @@ struct ceph_mds_client { struct rw_semaphore pool_perm_rwsem; struct rb_root pool_perm_tree; + + char nodename[__NEW_UTS_LEN + 1]; }; extern const char *ceph_mds_op_name(int op); From 19a8d6b7604df85402deecae01d7861cb1d40c89 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 19 Sep 2017 15:50:42 +0100 Subject: [PATCH 103/279] MIPS: PCI: Move map_irq() hooks out of initdata 04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") moved the PCI IRQ fixup to the new host bridge map/swizzle_irq() hooks mechanism. Those hooks can also be called after boot, when all the __init/__initdata/__initconst sections have been freed. Therefore, functions called by them (and the data they refer to) must not be marked as __init/__initdata/__initconst lest compilation trigger section mismatch warnings. Fix all the board files map_irq() hooks by simply removing the respective __init/__initdata/__initconst section markers and by adding another persistent hook IRQ map for the txx9 board files. Fixes: 04c81c7293df ("MIPS: PCI: Replace pci_fixup_irqs() call with host bridge IRQ mapping hooks") Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Arnd Bergmann Cc: Ralf Baechle Cc: Steve French --- arch/mips/ath79/pci.c | 12 ++++++------ arch/mips/pci/fixup-capcella.c | 4 ++-- arch/mips/pci/fixup-cobalt.c | 8 ++++---- arch/mips/pci/fixup-emma2rh.c | 4 ++-- arch/mips/pci/fixup-fuloong2e.c | 2 +- arch/mips/pci/fixup-ip32.c | 4 ++-- arch/mips/pci/fixup-jmr3927.c | 2 +- arch/mips/pci/fixup-lantiq.c | 2 +- arch/mips/pci/fixup-lemote2f.c | 4 ++-- arch/mips/pci/fixup-loongson3.c | 2 +- arch/mips/pci/fixup-malta.c | 4 ++-- arch/mips/pci/fixup-mpc30x.c | 6 +++--- arch/mips/pci/fixup-pmcmsp.c | 8 ++++---- arch/mips/pci/fixup-rbtx4927.c | 2 +- arch/mips/pci/fixup-rbtx4938.c | 2 +- arch/mips/pci/fixup-sni.c | 12 ++++++------ arch/mips/pci/fixup-tb0219.c | 2 +- arch/mips/pci/fixup-tb0226.c | 2 +- arch/mips/pci/fixup-tb0287.c | 2 +- arch/mips/pci/pci-alchemy.c | 2 +- arch/mips/pci/pci-bcm47xx.c | 2 +- arch/mips/pci/pci-lasat.c | 2 +- arch/mips/pci/pci-mt7620.c | 2 +- arch/mips/pci/pci-octeon.c | 5 ++--- arch/mips/pci/pci-rt2880.c | 2 +- arch/mips/pci/pci-rt3883.c | 2 +- arch/mips/pci/pci-tx4938.c | 2 +- arch/mips/pci/pci-tx4939.c | 4 ++-- arch/mips/pci/pci-xlp.c | 2 +- arch/mips/pci/pci-xlr.c | 2 +- arch/mips/pci/pcie-octeon.c | 3 +-- arch/mips/txx9/generic/pci.c | 8 ++++++-- 32 files changed, 62 insertions(+), 60 deletions(-) diff --git a/arch/mips/ath79/pci.c b/arch/mips/ath79/pci.c index 730c0b03060d..b816cb4a25ff 100644 --- a/arch/mips/ath79/pci.c +++ b/arch/mips/ath79/pci.c @@ -22,10 +22,10 @@ #include "pci.h" static int (*ath79_pci_plat_dev_init)(struct pci_dev *dev); -static const struct ath79_pci_irq *ath79_pci_irq_map __initdata; -static unsigned ath79_pci_nr_irqs __initdata; +static const struct ath79_pci_irq *ath79_pci_irq_map; +static unsigned ath79_pci_nr_irqs; -static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = { +static const struct ath79_pci_irq ar71xx_pci_irq_map[] = { { .slot = 17, .pin = 1, @@ -41,7 +41,7 @@ static const struct ath79_pci_irq ar71xx_pci_irq_map[] __initconst = { } }; -static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = { +static const struct ath79_pci_irq ar724x_pci_irq_map[] = { { .slot = 0, .pin = 1, @@ -49,7 +49,7 @@ static const struct ath79_pci_irq ar724x_pci_irq_map[] __initconst = { } }; -static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = { +static const struct ath79_pci_irq qca955x_pci_irq_map[] = { { .bus = 0, .slot = 0, @@ -64,7 +64,7 @@ static const struct ath79_pci_irq qca955x_pci_irq_map[] __initconst = { }, }; -int __init pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin) +int pcibios_map_irq(const struct pci_dev *dev, uint8_t slot, uint8_t pin) { int irq = -1; int i; diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c index 1c02f5737367..b4c263f16b15 100644 --- a/arch/mips/pci/fixup-capcella.c +++ b/arch/mips/pci/fixup-capcella.c @@ -32,13 +32,13 @@ #define INTC PC104PLUS_INTC_IRQ #define INTD PC104PLUS_INTD_IRQ -static char irq_tab_capcella[][5] __initdata = { +static char irq_tab_capcella[][5] = { [11] = { -1, INT1, INT1, INT1, INT1 }, [12] = { -1, INT2, INT2, INT2, INT2 }, [14] = { -1, INTA, INTB, INTC, INTD } }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_tab_capcella[slot][pin]; } diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c index b3ab59318d91..44be65c3e6bb 100644 --- a/arch/mips/pci/fixup-cobalt.c +++ b/arch/mips/pci/fixup-cobalt.c @@ -147,7 +147,7 @@ static void qube_raq_via_board_id_fixup(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, qube_raq_via_board_id_fixup); -static char irq_tab_qube1[] __initdata = { +static char irq_tab_qube1[] = { [COBALT_PCICONF_CPU] = 0, [COBALT_PCICONF_ETH0] = QUBE1_ETH0_IRQ, [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ, @@ -156,7 +156,7 @@ static char irq_tab_qube1[] __initdata = { [COBALT_PCICONF_ETH1] = 0 }; -static char irq_tab_cobalt[] __initdata = { +static char irq_tab_cobalt[] = { [COBALT_PCICONF_CPU] = 0, [COBALT_PCICONF_ETH0] = ETH0_IRQ, [COBALT_PCICONF_RAQSCSI] = SCSI_IRQ, @@ -165,7 +165,7 @@ static char irq_tab_cobalt[] __initdata = { [COBALT_PCICONF_ETH1] = ETH1_IRQ }; -static char irq_tab_raq2[] __initdata = { +static char irq_tab_raq2[] = { [COBALT_PCICONF_CPU] = 0, [COBALT_PCICONF_ETH0] = ETH0_IRQ, [COBALT_PCICONF_RAQSCSI] = RAQ2_SCSI_IRQ, @@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = { [COBALT_PCICONF_ETH1] = ETH1_IRQ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (cobalt_board_id <= COBALT_BRD_ID_QUBE1) return irq_tab_qube1[slot]; diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c index 19caf775c206..c31cb6af1cd0 100644 --- a/arch/mips/pci/fixup-emma2rh.c +++ b/arch/mips/pci/fixup-emma2rh.c @@ -43,7 +43,7 @@ */ #define MAX_SLOT_NUM 10 -static unsigned char irq_map[][5] __initdata = { +static unsigned char irq_map[][5] = { [3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC, MARKEINS_PCI_IRQ_INTD, 0,}, [4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,}, @@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH, emma2rh_pci_host_fixup); -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_map[slot][pin]; } diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c index 50da773faede..b47c2771dc99 100644 --- a/arch/mips/pci/fixup-fuloong2e.c +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -19,7 +19,7 @@ /* South bridge slot number is set by the pci probe process */ static u8 sb_slot = 5; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = 0; diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c index 133685e215ee..c6ec18a07e63 100644 --- a/arch/mips/pci/fixup-ip32.c +++ b/arch/mips/pci/fixup-ip32.c @@ -21,7 +21,7 @@ #define INTB MACEPCI_SHARED0_IRQ #define INTC MACEPCI_SHARED1_IRQ #define INTD MACEPCI_SHARED2_IRQ -static char irq_tab_mace[][5] __initdata = { +static char irq_tab_mace[][5] = { /* Dummy INT#A INT#B INT#C INT#D */ {0, 0, 0, 0, 0}, /* This is placeholder row - never used */ {0, SCSI0, SCSI0, SCSI0, SCSI0}, @@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = { * irqs. I suppose a device without a pin A will thank us for doing it * right if there exists such a broken piece of crap. */ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_tab_mace[slot][pin]; } diff --git a/arch/mips/pci/fixup-jmr3927.c b/arch/mips/pci/fixup-jmr3927.c index 0f1069527cba..d3102eeea898 100644 --- a/arch/mips/pci/fixup-jmr3927.c +++ b/arch/mips/pci/fixup-jmr3927.c @@ -31,7 +31,7 @@ #include #include -int __init jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int jmr3927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char irq = pin; diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 2b5427d3f35c..81530a13b349 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return of_irq_parse_and_map_pci(dev, slot, pin); } diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c index 95ab9a1bd010..20cdfdc08938 100644 --- a/arch/mips/pci/fixup-lemote2f.c +++ b/arch/mips/pci/fixup-lemote2f.c @@ -30,7 +30,7 @@ #define PCID 7 /* all the pci device has the PCIA pin, check the datasheet. */ -static char irq_tab[][5] __initdata = { +static char irq_tab[][5] = { /* INTA INTB INTC INTD */ {0, 0, 0, 0, 0}, /* 11: Unused */ {0, 0, 0, 0, 0}, /* 12: Unused */ @@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = { {0, 0, 0, 0, 0}, /* 27: Unused */ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int virq; diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c index 2b6d5e196f99..8a741c2c6685 100644 --- a/arch/mips/pci/fixup-loongson3.c +++ b/arch/mips/pci/fixup-loongson3.c @@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev) pdev->vendor, pdev->device, pdev->irq); } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { print_fixup_info(dev); return dev->irq; diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c index 40e920c653cc..3ec85331795e 100644 --- a/arch/mips/pci/fixup-malta.c +++ b/arch/mips/pci/fixup-malta.c @@ -12,7 +12,7 @@ static char pci_irq[5] = { }; -static char irq_tab[][5] __initdata = { +static char irq_tab[][5] = { /* INTA INTB INTC INTD */ {0, 0, 0, 0, 0 }, /* 0: GT64120 PCI bridge */ {0, 0, 0, 0, 0 }, /* 1: Unused */ @@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = { {0, PCID, PCIA, PCIB, PCIC } /* 21: PCI Slot 4 */ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int virq; virq = irq_tab[slot][pin]; diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c index 8e4f8288eca2..66eaf456bc89 100644 --- a/arch/mips/pci/fixup-mpc30x.c +++ b/arch/mips/pci/fixup-mpc30x.c @@ -22,19 +22,19 @@ #include -static const int internal_func_irqs[] __initconst = { +static const int internal_func_irqs[] = { VRC4173_CASCADE_IRQ, VRC4173_AC97_IRQ, VRC4173_USB_IRQ, }; -static const int irq_tab_mpc30x[] __initconst = { +static const int irq_tab_mpc30x[] = { [12] = VRC4173_PCMCIA1_IRQ, [13] = VRC4173_PCMCIA2_IRQ, [29] = MQ200_IRQ, }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (slot == 30) return internal_func_irqs[PCI_FUNC(dev->devfn)]; diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c index fab405c21c2f..4ad2ef02087b 100644 --- a/arch/mips/pci/fixup-pmcmsp.c +++ b/arch/mips/pci/fixup-pmcmsp.c @@ -47,7 +47,7 @@ #if defined(CONFIG_PMC_MSP7120_GW) /* Garibaldi Board IRQ wiring to PCI slots */ -static char irq_tab[][5] __initdata = { +static char irq_tab[][5] = { /* INTA INTB INTC INTD */ {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ @@ -86,7 +86,7 @@ static char irq_tab[][5] __initdata = { #elif defined(CONFIG_PMC_MSP7120_EVAL) /* MSP7120 Eval Board IRQ wiring to PCI slots */ -static char irq_tab[][5] __initdata = { +static char irq_tab[][5] = { /* INTA INTB INTC INTD */ {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ @@ -125,7 +125,7 @@ static char irq_tab[][5] __initdata = { #else /* Unknown board -- don't assign any IRQs */ -static char irq_tab[][5] __initdata = { +static char irq_tab[][5] = { /* INTA INTB INTC INTD */ {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ @@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) * RETURNS: IRQ number * ****************************************************************************/ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL) printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n"); diff --git a/arch/mips/pci/fixup-rbtx4927.c b/arch/mips/pci/fixup-rbtx4927.c index 321db265829c..d6aaed1d6be9 100644 --- a/arch/mips/pci/fixup-rbtx4927.c +++ b/arch/mips/pci/fixup-rbtx4927.c @@ -36,7 +36,7 @@ #include #include -int __init rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int rbtx4927_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char irq = pin; diff --git a/arch/mips/pci/fixup-rbtx4938.c b/arch/mips/pci/fixup-rbtx4938.c index a80579af609b..ff22a22db73e 100644 --- a/arch/mips/pci/fixup-rbtx4938.c +++ b/arch/mips/pci/fixup-rbtx4938.c @@ -13,7 +13,7 @@ #include #include -int __init rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int rbtx4938_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = tx4938_pcic1_map_irq(dev, slot); diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c index f67ebeeb4200..adb9a58641e8 100644 --- a/arch/mips/pci/fixup-sni.c +++ b/arch/mips/pci/fixup-sni.c @@ -40,7 +40,7 @@ * seem to be a documentation error. At least on my RM200C the Cirrus * Logic CL-GD5434 VGA is device 3. */ -static char irq_tab_rm200[8][5] __initdata = { +static char irq_tab_rm200[8][5] = { /* INTA INTB INTC INTD */ { 0, 0, 0, 0, 0 }, /* EISA bridge */ { SCSI, SCSI, SCSI, SCSI, SCSI }, /* SCSI */ @@ -57,7 +57,7 @@ static char irq_tab_rm200[8][5] __initdata = { * * The VGA card is optional for RM300 systems. */ -static char irq_tab_rm300d[8][5] __initdata = { +static char irq_tab_rm300d[8][5] = { /* INTA INTB INTC INTD */ { 0, 0, 0, 0, 0 }, /* EISA bridge */ { SCSI, SCSI, SCSI, SCSI, SCSI }, /* SCSI */ @@ -69,7 +69,7 @@ static char irq_tab_rm300d[8][5] __initdata = { { 0, INTD, INTA, INTB, INTC }, /* Slot 4 */ }; -static char irq_tab_rm300e[5][5] __initdata = { +static char irq_tab_rm300e[5][5] = { /* INTA INTB INTC INTD */ { 0, 0, 0, 0, 0 }, /* HOST bridge */ { SCSI, SCSI, SCSI, SCSI, SCSI }, /* SCSI */ @@ -96,7 +96,7 @@ static char irq_tab_rm300e[5][5] __initdata = { #define INTC PCIT_IRQ_INTC #define INTD PCIT_IRQ_INTD -static char irq_tab_pcit[13][5] __initdata = { +static char irq_tab_pcit[13][5] = { /* INTA INTB INTC INTD */ { 0, 0, 0, 0, 0 }, /* HOST bridge */ { SCSI0, SCSI0, SCSI0, SCSI0, SCSI0 }, /* SCSI */ @@ -113,7 +113,7 @@ static char irq_tab_pcit[13][5] __initdata = { { 0, INTA, INTB, INTC, INTD }, /* Slot 5 */ }; -static char irq_tab_pcit_cplus[13][5] __initdata = { +static char irq_tab_pcit_cplus[13][5] = { /* INTA INTB INTC INTD */ { 0, 0, 0, 0, 0 }, /* HOST bridge */ { 0, INTB, INTC, INTD, INTA }, /* PCI Slot 9 */ @@ -130,7 +130,7 @@ static inline int is_rm300_revd(void) return (csmsr & 0xa0) == 0x20; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (sni_brd_type) { case SNI_BRD_PCI_TOWER_CPLUS: diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c index d0b0083fbd27..cc581535f257 100644 --- a/arch/mips/pci/fixup-tb0219.c +++ b/arch/mips/pci/fixup-tb0219.c @@ -23,7 +23,7 @@ #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c index 4196ccf3ea3d..b827b5cad5fd 100644 --- a/arch/mips/pci/fixup-tb0226.c +++ b/arch/mips/pci/fixup-tb0226.c @@ -23,7 +23,7 @@ #include #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c index 8c5039ed75d7..98f26285f2e3 100644 --- a/arch/mips/pci/fixup-tb0287.c +++ b/arch/mips/pci/fixup-tb0287.c @@ -22,7 +22,7 @@ #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char bus; int irq = -1; diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index e99ca7702d8a..f15ec98de2de 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void) arch_initcall(alchemy_pci_init); -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct alchemy_pci_context *ctx = dev->sysdata; if (ctx && ctx->board_map_irq) diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c index 76f16eaed0ad..230d7dd273e2 100644 --- a/arch/mips/pci/pci-bcm47xx.c +++ b/arch/mips/pci/pci-bcm47xx.c @@ -28,7 +28,7 @@ #include #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return 0; } diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c index 40d2797d2bc4..47f4ee6bbb3b 100644 --- a/arch/mips/pci/pci-lasat.c +++ b/arch/mips/pci/pci-lasat.c @@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup); #define LASAT_IRQ_PCIC (LASAT_IRQ_BASE + 7) #define LASAT_IRQ_PCID (LASAT_IRQ_BASE + 8) -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (slot) { case 1: diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c index 4e633c1e7ff3..90fba9bf98da 100644 --- a/arch/mips/pci/pci-mt7620.c +++ b/arch/mips/pci/pci-mt7620.c @@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u16 cmd; u32 val; diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 9ee01936862e..3e92a06fa772 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -59,8 +59,7 @@ union octeon_pci_address { } s; }; -int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev, - u8 slot, u8 pin); +int (*octeon_pcibios_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin); enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID; /** @@ -74,7 +73,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID; * as it goes through each bridge. * Returns Interrupt number for the device */ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (octeon_pcibios_map_irq) return octeon_pcibios_map_irq(dev, slot, pin); diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index d6360fe73d05..711cdccdf65b 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val) spin_unlock_irqrestore(&rt2880_pci_lock, flags); } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u16 cmd; int irq = -1; diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index 04f8ea953297..958899ffe99c 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -564,7 +564,7 @@ err_put_intc_node: return err; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return of_irq_parse_and_map_pci(dev, slot, pin); } diff --git a/arch/mips/pci/pci-tx4938.c b/arch/mips/pci/pci-tx4938.c index 000c0e1f9ef8..a6418460e3c4 100644 --- a/arch/mips/pci/pci-tx4938.c +++ b/arch/mips/pci/pci-tx4938.c @@ -112,7 +112,7 @@ int __init tx4938_pciclk66_setup(void) return pciclk; } -int __init tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot) +int tx4938_pcic1_map_irq(const struct pci_dev *dev, u8 slot) { if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4938_pcic1ptr) { switch (slot) { diff --git a/arch/mips/pci/pci-tx4939.c b/arch/mips/pci/pci-tx4939.c index 9d6acc00f348..09a65f7dbe7c 100644 --- a/arch/mips/pci/pci-tx4939.c +++ b/arch/mips/pci/pci-tx4939.c @@ -48,7 +48,7 @@ void __init tx4939_report_pci1clk(void) ((pciclk + 50000) / 100000) % 10); } -int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot) +int tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot) { if (get_tx4927_pcicptr(dev->bus->sysdata) == tx4939_pcic1ptr) { switch (slot) { @@ -68,7 +68,7 @@ int __init tx4939_pcic1_map_irq(const struct pci_dev *dev, u8 slot) return -1; } -int __init tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int tx4939_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = tx4939_pcic1_map_irq(dev, slot); diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c index 7babf01600cb..9eff9137f78e 100644 --- a/arch/mips/pci/pci-xlp.c +++ b/arch/mips/pci/pci-xlp.c @@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev) return PCI_SLOT(lnkdev->devfn) / 8; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct pci_dev *lnkdev; int lnkfunc, node; diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 26d2dabef281..2a1c81a129ba 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d) } } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return get_irq_vector(dev); } diff --git a/arch/mips/pci/pcie-octeon.c b/arch/mips/pci/pcie-octeon.c index ad3584dbc9d7..fd2887415bc8 100644 --- a/arch/mips/pci/pcie-octeon.c +++ b/arch/mips/pci/pcie-octeon.c @@ -1464,8 +1464,7 @@ static int cvmx_pcie_rc_initialize(int pcie_port) * as it goes through each bridge. * Returns Interrupt number for the device */ -int __init octeon_pcie_pcibios_map_irq(const struct pci_dev *dev, - u8 slot, u8 pin) +int octeon_pcie_pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { /* * The EBH5600 board with the PCI to PCIe bridge mistakenly diff --git a/arch/mips/txx9/generic/pci.c b/arch/mips/txx9/generic/pci.c index 0bd2a1e1ff9a..fb998726bd5d 100644 --- a/arch/mips/txx9/generic/pci.c +++ b/arch/mips/txx9/generic/pci.c @@ -386,9 +386,10 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +static int (*txx9_pci_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin); +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { - return txx9_board_vec->pci_map_irq(dev, slot, pin); + return txx9_pci_map_irq(dev, slot, pin); } char * (*txx9_board_pcibios_setup)(char *str) __initdata; @@ -424,5 +425,8 @@ char *__init txx9_pcibios_setup(char *str) txx9_pci_err_action = TXX9_PCI_ERR_IGNORE; return NULL; } + + txx9_pci_map_irq = txx9_board_vec->pci_map_irq; + return str; } From 9e987b70ada27554c5d176421de1d167218c49b5 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Fri, 15 Sep 2017 17:35:27 -0700 Subject: [PATCH 104/279] ACPI / bus: Make ACPI_HANDLE() work for non-GPL code again Due to commit db3e50f3234b (device property: Get rid of struct fwnode_handle type field), ACPI_HANDLE() inadvertently became a GPL-only call. The call path that led to that was: ACPI_HANDLE() ACPI_COMPANION() to_acpi_device_node() is_acpi_device_node() acpi_device_fwnode_ops DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops); ...and the new DECLARE_ACPI_FWNODE_OPS() includes EXPORT_SYMBOL_GPL, whereas previously it was a static struct. In order to avoid changing any of that, let's instead provide ever so slightly better encapsulation of those struct fwnode_operations instances. Those do not really need to be directly used in inline function calls in header files. Simply moving two small functions (is_acpi_device_node and is_acpi_data_node) out of acpi_bus.h, and into a .c file, does that. That leaves the internals of struct fwnode_operations as GPL-only (which I think was the intent all along), but un-breaks any driver code out there that relies on the ACPI subsystem's being (historically) an EXPORT_SYMBOL-usable system. By that, I mean, ACPI_HANDLE() and other basic ACPI calls were non-GPL-protected. Also, while I'm there, remove a tiny bit of redundancy that was missed in the earlier commit, by having is_acpi_node() use the other two routines, instead of checking fwnode directly. Fixes: db3e50f3234b (device property: Get rid of struct fwnode_handle type field) Signed-off-by: John Hubbard Acked-by: Sakari Ailus Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 13 +++++++++++++ include/acpi/acpi_bus.h | 18 ++++-------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index c1c216163de3..1e3c2517a1ac 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1293,3 +1293,16 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops); DECLARE_ACPI_FWNODE_OPS(acpi_data_fwnode_ops); const struct fwnode_operations acpi_static_fwnode_ops; + +bool is_acpi_device_node(const struct fwnode_handle *fwnode) +{ + return !IS_ERR_OR_NULL(fwnode) && + fwnode->ops == &acpi_device_fwnode_ops; +} +EXPORT_SYMBOL(is_acpi_device_node); + +bool is_acpi_data_node(const struct fwnode_handle *fwnode) +{ + return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops; +} +EXPORT_SYMBOL(is_acpi_data_node); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index dedf9d789166..fa1505292f6c 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -399,17 +399,12 @@ extern const struct fwnode_operations acpi_device_fwnode_ops; extern const struct fwnode_operations acpi_data_fwnode_ops; extern const struct fwnode_operations acpi_static_fwnode_ops; +bool is_acpi_device_node(const struct fwnode_handle *fwnode); +bool is_acpi_data_node(const struct fwnode_handle *fwnode); + static inline bool is_acpi_node(const struct fwnode_handle *fwnode) { - return !IS_ERR_OR_NULL(fwnode) && - (fwnode->ops == &acpi_device_fwnode_ops - || fwnode->ops == &acpi_data_fwnode_ops); -} - -static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) -{ - return !IS_ERR_OR_NULL(fwnode) && - fwnode->ops == &acpi_device_fwnode_ops; + return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode)); } #define to_acpi_device_node(__fwnode) \ @@ -422,11 +417,6 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode) NULL; \ }) -static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode) -{ - return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops; -} - #define to_acpi_data_node(__fwnode) \ ({ \ typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode; \ From 6073512cc8e2c48bed5c6625c02c5e4ae50cec34 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 18 Sep 2017 14:59:20 +0200 Subject: [PATCH 105/279] net: phy: Kconfig: Fix PHY infrastructure menu in menuconfig Since the integration of PHYLINK, the configuration option which used to be under the PHY infrastructure menu in menuconfig ended up one level up (the network device driver section) By placing PHYLINK option right after PHYLIB entry, it broke the way Kconfig used to build the menu. See kconfig-language.txt, section "Menu structure", 2nd method. This is fixed by placing the PHYLINK option just before PHYLIB. Fixes: 9525ae83959b ("phylink: add phylink infrastructure") Signed-off-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index a9d16a3af514..cd931cf9dcc2 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -160,15 +160,6 @@ config MDIO_XGENE endif -menuconfig PHYLIB - tristate "PHY Device support and infrastructure" - depends on NETDEVICES - select MDIO_DEVICE - help - Ethernet controllers are usually attached to PHY - devices. This option provides infrastructure for - managing PHY devices. - config PHYLINK tristate depends on NETDEVICES @@ -179,6 +170,15 @@ config PHYLINK configuration links, PHYs, and Serdes links with MAC level autonegotiation modes. +menuconfig PHYLIB + tristate "PHY Device support and infrastructure" + depends on NETDEVICES + select MDIO_DEVICE + help + Ethernet controllers are usually attached to PHY + devices. This option provides infrastructure for + managing PHY devices. + if PHYLIB config SWPHY From b247c211ee367cfe975dd54e381094083adfd8d3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 19 Sep 2017 02:43:13 +0200 Subject: [PATCH 106/279] PM: docs: Drop an excess character from devices.rst Drop an excess "`" from Documentation/driver-api/pm/devices.rst. Fixes: 2728b2d2e5be (PM / core / docs: Convert sleep states API document to reST) Signed-off-by: Rafael J. Wysocki --- Documentation/driver-api/pm/devices.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst index bedd32388dac..a0dc2879a152 100644 --- a/Documentation/driver-api/pm/devices.rst +++ b/Documentation/driver-api/pm/devices.rst @@ -675,7 +675,7 @@ sub-domain of the parent domain. Support for power domains is provided through the :c:member:`pm_domain` field of |struct device|. This field is a pointer to an object of type -|struct dev_pm_domain|, defined in :file:`include/linux/pm.h``, providing a set +|struct dev_pm_domain|, defined in :file:`include/linux/pm.h`, providing a set of power management callbacks analogous to the subsystem-level and device driver callbacks that are executed for the given device during all power transitions, instead of the respective subsystem-level callbacks. Specifically, if a From 157c460e10cb6eca29ccbd0f023db159d0c55ec7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 19 Sep 2017 02:22:39 +0200 Subject: [PATCH 107/279] PM: core: Fix device_pm_check_callbacks() The device_pm_check_callbacks() function doesn't check legacy ->suspend and ->resume callback pointers under the device's bus type, class and driver, so in some cases it may set the no_pm_callbacks flag for the device incorrectly and then the callbacks may be skipped during system suspend/resume, which shouldn't happen. Fixes: aa8e54b55947 (PM / sleep: Go direct_complete if driver has no callbacks) Signed-off-by: Rafael J. Wysocki Cc: 4.5+ # 4.5+ --- drivers/base/power/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ea1732ed7a9d..770b1539a083 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1860,10 +1860,13 @@ void device_pm_check_callbacks(struct device *dev) { spin_lock_irq(&dev->power.lock); dev->power.no_pm_callbacks = - (!dev->bus || pm_ops_is_empty(dev->bus->pm)) && - (!dev->class || pm_ops_is_empty(dev->class->pm)) && + (!dev->bus || (pm_ops_is_empty(dev->bus->pm) && + !dev->bus->suspend && !dev->bus->resume)) && + (!dev->class || (pm_ops_is_empty(dev->class->pm) && + !dev->class->suspend && !dev->class->resume)) && (!dev->type || pm_ops_is_empty(dev->type->pm)) && (!dev->pm_domain || pm_ops_is_empty(&dev->pm_domain->ops)) && - (!dev->driver || pm_ops_is_empty(dev->driver->pm)); + (!dev->driver || (pm_ops_is_empty(dev->driver->pm) && + !dev->driver->suspend && !dev->driver->resume)); spin_unlock_irq(&dev->power.lock); } From ff76898c0a14a43f71bfe63dd1903087e96ce238 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 19 Sep 2017 08:23:22 -0700 Subject: [PATCH 108/279] cpufreq: dt-platdev: Add some missing platforms to the blacklist Commit edeec420de24 (cpufreq: dt-platdev: Automatically create cpufreq device with OPP v2) missed adding few platforms to the blacklist which create the cpufreq-dt device from their own drivers, after some dependencies are sorted out. And for those platforms, both the platform specific driver and the cpufreq-dt-platdev driver try to create the cpufreq-dt device now. Fix that by including those platforms in the blacklist. This doesn't include the TI platforms, for which there is a separate patch. Fixes: edeec420de24 (cpufreq: dt-cpufreq: platdev Automatically create device with OPP v2) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt-platdev.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index a020da7940d6..430edadca527 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -106,6 +106,18 @@ static const struct of_device_id whitelist[] __initconst = { * platforms using "operating-points-v2" property. */ static const struct of_device_id blacklist[] __initconst = { + { .compatible = "calxeda,highbank", }, + { .compatible = "calxeda,ecx-2000", }, + + { .compatible = "marvell,armadaxp", }, + + { .compatible = "nvidia,tegra124", }, + + { .compatible = "st,stih407", }, + { .compatible = "st,stih410", }, + + { .compatible = "sigma,tango4", }, + { } }; From ed40fad9a568efe83f5e80897ded71117b6911b3 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 31 Aug 2017 22:24:36 +0200 Subject: [PATCH 109/279] ARM: cpuidle: Avoid memleak if init fail In case there are no DT idle states defined or cpuidle_register_driver() fails, the copy of the idle driver is leaked: unreferenced object 0xede0dc00 (size 1024): comm "swapper/0", pid 1, jiffies 4294937431 (age 744.510s) hex dump (first 32 bytes): 94 9e 0b c1 00 00 00 00 00 00 00 00 00 00 00 00 ................ 57 46 49 00 00 00 00 00 00 00 00 00 00 00 00 00 WFI............. backtrace: [] arm_idle_init+0x44/0x1ac [] do_one_initcall+0x3c/0x16c [] kernel_init_freeable+0x110/0x1d0 [] kernel_init+0x8/0x114 [] ret_from_fork+0x14/0x3c So fix this by freeing the unregistered copy in error case. Signed-off-by: Stefan Wahren Fixes: d50a7d8acd78 (ARM: cpuidle: Support asymmetric idle definition) Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-arm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 7080c384ad5d..52a75053ee03 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -104,13 +104,13 @@ static int __init arm_idle_init(void) ret = dt_init_idle_driver(drv, arm_idle_state_match, 1); if (ret <= 0) { ret = ret ? : -ENODEV; - goto out_fail; + goto init_fail; } ret = cpuidle_register_driver(drv); if (ret) { pr_err("Failed to register cpuidle driver\n"); - goto out_fail; + goto init_fail; } /* @@ -149,6 +149,8 @@ static int __init arm_idle_init(void) } return 0; +init_fail: + kfree(drv); out_fail: while (--cpu >= 0) { dev = per_cpu(cpuidle_devices, cpu); From 0c0bceb796878a5baea1f47033215fac0774e498 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 8 Sep 2017 12:24:41 +0300 Subject: [PATCH 110/279] ACPI: properties: Return _DSD hierarchical extension (data) sub-nodes correctly The recently merged patch "ACPI: Prepare for constifying acpi_get_next_subnode() fwnode argument" was part of a patchset constifying the fwnode arguments across the fwnode property API. The purpose of the patch was to allow returning non-const fwnodes from a data structure the root of which is const. Unfortunately the patch introduced the functionality, in particular when starting parsed from an ACPI device node, the hierarchical data extension nodes would not be enumerated. Restore the old behaviour while still retaining constness properties of the patch. Fixes: 01c1da289791 "ACPI: Prepare for constifying acpi_get_next_subnode() fwnode argument" Signed-off-by: Sakari Ailus Acked-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index c1c216163de3..265b74f440b6 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -908,11 +908,12 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { const struct acpi_device *adev = to_acpi_device_node(fwnode); - struct acpi_device *child_adev = NULL; const struct list_head *head; struct list_head *next; if (!child || is_acpi_device_node(child)) { + struct acpi_device *child_adev; + if (adev) head = &adev->children; else @@ -922,8 +923,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, goto nondev; if (child) { - child_adev = to_acpi_device_node(child); - next = child_adev->node.next; + adev = to_acpi_device_node(child); + next = adev->node.next; if (next == head) { child = NULL; goto nondev; @@ -941,8 +942,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode, const struct acpi_data_node *data = to_acpi_data_node(fwnode); struct acpi_data_node *dn; - if (child_adev) - head = &child_adev->data.subnodes; + if (adev) + head = &adev->data.subnodes; else if (data) head = &data->data.subnodes; else From 0647169cf9aa441700eb8f23ea49be060626534b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 19 Sep 2017 12:41:37 +0200 Subject: [PATCH 111/279] rhashtable: Documentation tweak Clarify that rhashtable_walk_{stop,start} will not reset the iterator to the beginning of the hash table. Confusion between rhashtable_walk_enter and rhashtable_walk_start has already lead to a bug. Signed-off-by: Andreas Gruenbacher Signed-off-by: David S. Miller --- lib/rhashtable.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 707ca5d677c6..ddd7dde87c3c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -735,9 +735,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); * rhashtable_walk_start - Start a hash table walk * @iter: Hash table iterator * - * Start a hash table walk. Note that we take the RCU lock in all - * cases including when we return an error. So you must always call - * rhashtable_walk_stop to clean up. + * Start a hash table walk at the current iterator position. Note that we take + * the RCU lock in all cases including when we return an error. So you must + * always call rhashtable_walk_stop to clean up. * * Returns zero if successful. * @@ -846,7 +846,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next); * rhashtable_walk_stop - Finish a hash table walk * @iter: Hash table iterator * - * Finish a hash table walk. + * Finish a hash table walk. Does not reset the iterator to the start of the + * hash table. */ void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU) From 75df6e688ccd517e339a7c422ef7ad73045b18a2 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sun, 17 Sep 2017 03:23:48 -0700 Subject: [PATCH 112/279] tracing: Fix trace_pipe behavior for instance traces When reading data from trace_pipe, tracing_wait_pipe() performs a check to see if tracing has been turned off after some data was read. Currently, this check always looks at global trace state, but it should be checking the trace instance where trace_pipe is located at. Because of this bug, cat instances/i1/trace_pipe in the following script will immediately exit instead of waiting for data: cd /sys/kernel/debug/tracing echo 0 > tracing_on mkdir -p instances/i1 echo 1 > instances/i1/tracing_on echo 1 > instances/i1/events/sched/sched_process_exec/enable cat instances/i1/trace_pipe Link: http://lkml.kernel.org/r/20170917102348.1615-1-tahsin@google.com Cc: stable@vger.kernel.org Fixes: 10246fa35d4f ("tracing: give easy way to clear trace buffer") Signed-off-by: Tahsin Erdogan Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d3ca35f38803..752e5daf0896 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5680,7 +5680,7 @@ static int tracing_wait_pipe(struct file *filp) * * iter->pos will be 0 if we haven't read anything. */ - if (!tracing_is_on() && iter->pos) + if (!tracer_tracing_is_on(iter->tr) && iter->pos) break; mutex_unlock(&iter->mutex); From 930651a75bf1ba6893a8b8475270664ebdb6cf4a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 09:15:59 -0700 Subject: [PATCH 113/279] bpf: do not disable/enable BH in bpf_map_free_id() syzkaller reported following splat [1] Since hard irq are disabled by the caller, bpf_map_free_id() should not try to enable/disable BH. Another solution would be to change htab_map_delete_elem() to defer the free_htab_elem() call after raw_spin_unlock_irqrestore(&b->lock, flags), but this might be not enough to cover other code paths. [1] WARNING: CPU: 1 PID: 8052 at kernel/softirq.c:161 __local_bh_enable_ip +0x1e/0x160 kernel/softirq.c:161 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 8052 Comm: syz-executor1 Not tainted 4.13.0-next-20170915+ #23 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:52 panic+0x1e4/0x417 kernel/panic.c:181 __warn+0x1c4/0x1d9 kernel/panic.c:542 report_bug+0x211/0x2d0 lib/bug.c:183 fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178 do_trap_no_signal arch/x86/kernel/traps.c:212 [inline] do_trap+0x260/0x390 arch/x86/kernel/traps.c:261 do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311 invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 RIP: 0010:__local_bh_enable_ip+0x1e/0x160 kernel/softirq.c:161 RSP: 0018:ffff8801cdcd7748 EFLAGS: 00010046 RAX: 0000000000000082 RBX: 0000000000000201 RCX: 0000000000000000 RDX: 1ffffffff0b5933c RSI: 0000000000000201 RDI: ffffffff85ac99e0 RBP: ffff8801cdcd7758 R08: ffffffff85b87158 R09: 1ffff10039b9aec6 R10: ffff8801c99f24c0 R11: 0000000000000002 R12: ffffffff817b0b47 R13: dffffc0000000000 R14: ffff8801cdcd77e8 R15: 0000000000000001 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:176 [inline] _raw_spin_unlock_bh+0x30/0x40 kernel/locking/spinlock.c:207 spin_unlock_bh include/linux/spinlock.h:361 [inline] bpf_map_free_id kernel/bpf/syscall.c:197 [inline] __bpf_map_put+0x267/0x320 kernel/bpf/syscall.c:227 bpf_map_put+0x1a/0x20 kernel/bpf/syscall.c:235 bpf_map_fd_put_ptr+0x15/0x20 kernel/bpf/map_in_map.c:96 free_htab_elem+0xc3/0x1b0 kernel/bpf/hashtab.c:658 htab_map_delete_elem+0x74d/0x970 kernel/bpf/hashtab.c:1063 map_delete_elem kernel/bpf/syscall.c:633 [inline] SYSC_bpf kernel/bpf/syscall.c:1479 [inline] SyS_bpf+0x2188/0x46a0 kernel/bpf/syscall.c:1451 entry_SYSCALL_64_fastpath+0x1f/0xbe Fixes: f3f1c054c288 ("bpf: Introduce bpf_map ID") Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Acked-by: Martin KaFai Lau Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/syscall.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cb17e1cd1d43..25d074920a00 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -186,15 +186,17 @@ static int bpf_map_alloc_id(struct bpf_map *map) static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) { + unsigned long flags; + if (do_idr_lock) - spin_lock_bh(&map_idr_lock); + spin_lock_irqsave(&map_idr_lock, flags); else __acquire(&map_idr_lock); idr_remove(&map_idr, map->id); if (do_idr_lock) - spin_unlock_bh(&map_idr_lock); + spin_unlock_irqrestore(&map_idr_lock, flags); else __release(&map_idr_lock); } From 039cc1c1eb971ffe1973fddb8769a80fd4950a6f Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 19 Sep 2017 15:06:13 -0500 Subject: [PATCH 114/279] cpufreq: ti-cpufreq: Support additional am43xx platforms Rather than letting the ti-cpufreq driver match against 'ti,am4372' machine compatible during probe let's match against 'ti,am43' so that we can support both 'ti,am4372' and 'ti,am438x' platforms which both match to this compatible. Signed-off-by: Dave Gerlach Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/ti-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index b29cd3398463..4bf47de6101f 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -190,7 +190,7 @@ static int ti_cpufreq_setup_syscon_register(struct ti_cpufreq_data *opp_data) static const struct of_device_id ti_cpufreq_of_match[] = { { .compatible = "ti,am33xx", .data = &am3x_soc_data, }, - { .compatible = "ti,am4372", .data = &am4x_soc_data, }, + { .compatible = "ti,am43", .data = &am4x_soc_data, }, { .compatible = "ti,dra7", .data = &dra7_soc_data }, {}, }; From 2a4776e14f7da3d48fffb4edbb82355742f23478 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 19 Sep 2017 17:17:10 +0100 Subject: [PATCH 115/279] net: hns3: Fixes initialization of phy address from firmware Default phy address of every port is 0. Therefore, phy address for each port need to be fetched from firmware and device initialized with fetched non-default phy address. Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Lipeng Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index bb45365fb817..db4e07dac29a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1066,6 +1066,7 @@ static int hclge_configure(struct hclge_dev *hdev) for (i = 0; i < ETH_ALEN; i++) hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i]; hdev->hw.mac.media_type = cfg.media_type; + hdev->hw.mac.phy_addr = cfg.phy_addr; hdev->num_desc = cfg.tqp_desc_num; hdev->tm_info.num_pg = 1; hdev->tm_info.num_tc = cfg.tc_num; From c5b1b97522ef32d2170c9aa1a0c1eec179acbb3a Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 19 Sep 2017 17:17:11 +0100 Subject: [PATCH 116/279] net: hns3: Fixes the command used to unmap ring from vector This patch fixes the IMP command being used to unmap the vector from the corresponding ring. Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Lipeng Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index db4e07dac29a..e324bc6e9f4f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2779,7 +2779,7 @@ static int hclge_unmap_ring_from_vector( } i = 0; hclge_cmd_setup_basic_desc(&desc, - HCLGE_OPC_ADD_RING_TO_VECTOR, + HCLGE_OPC_DEL_RING_TO_VECTOR, false); req->int_vector_id = vector_id; } From 0305b443a3ba5b84aa474786026df04a70460135 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 19 Sep 2017 17:17:12 +0100 Subject: [PATCH 117/279] net: hns3: Fixes ring-to-vector map-and-unmap command This patch fixes the vector-to-ring map and unmap command and adds INT_GL(for, Gap Limiting Interrupts) and VF id to it as required by the hardware interface. Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Lipeng Signed-off-by: Mingguang Qu Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 8 ++++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 91ae0135ee50..c2b613b40509 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -238,7 +238,7 @@ struct hclge_tqp_map { u8 rsv[18]; }; -#define HCLGE_VECTOR_ELEMENTS_PER_CMD 11 +#define HCLGE_VECTOR_ELEMENTS_PER_CMD 10 enum hclge_int_type { HCLGE_INT_TX, @@ -252,8 +252,12 @@ struct hclge_ctrl_vector_chain { #define HCLGE_INT_TYPE_S 0 #define HCLGE_INT_TYPE_M 0x3 #define HCLGE_TQP_ID_S 2 -#define HCLGE_TQP_ID_M (0x3fff << HCLGE_TQP_ID_S) +#define HCLGE_TQP_ID_M (0x7ff << HCLGE_TQP_ID_S) +#define HCLGE_INT_GL_IDX_S 13 +#define HCLGE_INT_GL_IDX_M (0x3 << HCLGE_INT_GL_IDX_S) __le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD]; + u8 vfid; + u8 rsv; }; #define HCLGE_TC_NUM 8 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e324bc6e9f4f..eafd9c678162 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2680,7 +2680,11 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id, hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, node->tqp_index); + hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M, + HCLGE_INT_GL_IDX_S, + hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]); + req->vfid = vport->vport_id; if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) { req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD; @@ -2764,8 +2768,12 @@ static int hclge_unmap_ring_from_vector( hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M, HCLGE_TQP_ID_S, node->tqp_index); + hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M, + HCLGE_INT_GL_IDX_S, + hnae_get_bit(node->flag, HNAE3_RING_TYPE_B)); req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]); + req->vfid = vport->vport_id; if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) { req->int_cause_num = HCLGE_VECTOR_ELEMENTS_PER_CMD; From 139e8792537b327252a9676591a78e6408d50a85 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 19 Sep 2017 17:17:13 +0100 Subject: [PATCH 118/279] net: hns3: Fixes the initialization of MAC address in hardware This patch fixes the initialization of MAC address, fetched from HNS3 firmware i.e. when it is not randomly generated, to the HNS3 hardware. Fixes: ca60906d2795 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC") Signed-off-by: Lipeng Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 1c3e29447891..4d68d6ea5143 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -2705,10 +2705,11 @@ static void hns3_init_mac_addr(struct net_device *netdev) eth_hw_addr_random(netdev); dev_warn(priv->dev, "using random MAC address %pM\n", netdev->dev_addr); - /* Also copy this new MAC address into hdev */ - if (h->ae_algo->ops->set_mac_addr) - h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr); } + + if (h->ae_algo->ops->set_mac_addr) + h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr); + } static void hns3_nic_set_priv_ops(struct net_device *netdev) From fbbb1536b220eb4f4f95cbceae6579489a8adad5 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Tue, 19 Sep 2017 17:17:14 +0100 Subject: [PATCH 119/279] net: hns3: Fixes the ether address copy with appropriate API This patch replaces the ethernet address copy instance with more appropriate ether_addr_copy() function. Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index eafd9c678162..8e172afd4876 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1063,8 +1063,7 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->base_tqp_pid = 0; hdev->rss_size_max = 1; hdev->rx_buf_len = cfg.rx_buf_len; - for (i = 0; i < ETH_ALEN; i++) - hdev->hw.mac.mac_addr[i] = cfg.mac_addr[i]; + ether_addr_copy(hdev->hw.mac.mac_addr, cfg.mac_addr); hdev->hw.mac.media_type = cfg.media_type; hdev->hw.mac.phy_addr = cfg.phy_addr; hdev->num_desc = cfg.tqp_desc_num; From 5e43aef8491ae3b5feb79cd15260faf39303ef33 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 19 Sep 2017 17:17:15 +0100 Subject: [PATCH 120/279] net: hns3: Fixes the default VLAN-id of PF When there is no vlan id in the packets, hardware will treat the vlan id as 0 and look for the mac_vlan table. This patch set the default vlan id of PF as 0. Without this config, it will fail when look for mac_vlan table, and hardware will drop packets. Fixes: 6427264ef330 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Mingguang Qu Signed-off-by: Lipeng Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 8e172afd4876..74008ef23169 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3673,6 +3673,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) { #define HCLGE_VLAN_TYPE_VF_TABLE 0 #define HCLGE_VLAN_TYPE_PORT_TABLE 1 + struct hnae3_handle *handle; int ret; ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_VF_TABLE, @@ -3682,8 +3683,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_VLAN_TYPE_PORT_TABLE, true); + if (ret) + return ret; - return ret; + handle = &hdev->vport[0].nic; + return hclge_set_port_vlan_filter(handle, htons(ETH_P_8021Q), 0, false); } static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) From 90f7b11a5a0081feb7041fcc795c9a131a62a725 Mon Sep 17 00:00:00 2001 From: Lipeng Date: Tue, 19 Sep 2017 17:17:16 +0100 Subject: [PATCH 121/279] net: hns3: Fixes the premature exit of loop when matching clients When register/unregister ae_dev, ae_dev should match all client in the client_list. Enet and roce can co-exists together so we should continue checking for enet and roce presence together. So break should not be there. Above caused problems in loading and unloading of modules. Fixes: 38eddd126772 ("net: hns3: Add support of the HNAE3 framework") Signed-off-by: Lipeng Signed-off-by: Salil Mehta Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.c | 43 +++++---------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index 59efbd605416..5bcb2238acb2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -37,20 +37,15 @@ static bool hnae3_client_match(enum hnae3_client_type client_type, } static int hnae3_match_n_instantiate(struct hnae3_client *client, - struct hnae3_ae_dev *ae_dev, - bool is_reg, bool *matched) + struct hnae3_ae_dev *ae_dev, bool is_reg) { int ret; - *matched = false; - /* check if this client matches the type of ae_dev */ if (!(hnae3_client_match(client->type, ae_dev->dev_type) && hnae_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) { return 0; } - /* there is a match of client and dev */ - *matched = true; /* now, (un-)instantiate client by calling lower layer */ if (is_reg) { @@ -69,7 +64,6 @@ int hnae3_register_client(struct hnae3_client *client) { struct hnae3_client *client_tmp; struct hnae3_ae_dev *ae_dev; - bool matched; int ret = 0; mutex_lock(&hnae3_common_lock); @@ -86,7 +80,7 @@ int hnae3_register_client(struct hnae3_client *client) /* if the client could not be initialized on current port, for * any error reasons, move on to next available port */ - ret = hnae3_match_n_instantiate(client, ae_dev, true, &matched); + ret = hnae3_match_n_instantiate(client, ae_dev, true); if (ret) dev_err(&ae_dev->pdev->dev, "match and instantiation failed for port\n"); @@ -102,12 +96,11 @@ EXPORT_SYMBOL(hnae3_register_client); void hnae3_unregister_client(struct hnae3_client *client) { struct hnae3_ae_dev *ae_dev; - bool matched; mutex_lock(&hnae3_common_lock); /* un-initialize the client on every matched port */ list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { - hnae3_match_n_instantiate(client, ae_dev, false, &matched); + hnae3_match_n_instantiate(client, ae_dev, false); } list_del(&client->node); @@ -124,7 +117,6 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo) const struct pci_device_id *id; struct hnae3_ae_dev *ae_dev; struct hnae3_client *client; - bool matched; int ret = 0; mutex_lock(&hnae3_common_lock); @@ -151,13 +143,10 @@ int hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo) * initialize the figure out client instance */ list_for_each_entry(client, &hnae3_client_list, node) { - ret = hnae3_match_n_instantiate(client, ae_dev, true, - &matched); + ret = hnae3_match_n_instantiate(client, ae_dev, true); if (ret) dev_err(&ae_dev->pdev->dev, "match and instantiation failed\n"); - if (matched) - break; } } @@ -175,7 +164,6 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo) const struct pci_device_id *id; struct hnae3_ae_dev *ae_dev; struct hnae3_client *client; - bool matched; mutex_lock(&hnae3_common_lock); /* Check if there are matched ae_dev */ @@ -187,12 +175,8 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo) /* check the client list for the match with this ae_dev type and * un-initialize the figure out client instance */ - list_for_each_entry(client, &hnae3_client_list, node) { - hnae3_match_n_instantiate(client, ae_dev, false, - &matched); - if (matched) - break; - } + list_for_each_entry(client, &hnae3_client_list, node) + hnae3_match_n_instantiate(client, ae_dev, false); ae_algo->ops->uninit_ae_dev(ae_dev); hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0); @@ -212,7 +196,6 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) const struct pci_device_id *id; struct hnae3_ae_algo *ae_algo; struct hnae3_client *client; - bool matched; int ret = 0; mutex_lock(&hnae3_common_lock); @@ -246,13 +229,10 @@ int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev) * initialize the figure out client instance */ list_for_each_entry(client, &hnae3_client_list, node) { - ret = hnae3_match_n_instantiate(client, ae_dev, true, - &matched); + ret = hnae3_match_n_instantiate(client, ae_dev, true); if (ret) dev_err(&ae_dev->pdev->dev, "match and instantiation failed\n"); - if (matched) - break; } out_err: @@ -270,7 +250,6 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev) const struct pci_device_id *id; struct hnae3_ae_algo *ae_algo; struct hnae3_client *client; - bool matched; mutex_lock(&hnae3_common_lock); /* Check if there are matched ae_algo */ @@ -279,12 +258,8 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev) if (!id) continue; - list_for_each_entry(client, &hnae3_client_list, node) { - hnae3_match_n_instantiate(client, ae_dev, false, - &matched); - if (matched) - break; - } + list_for_each_entry(client, &hnae3_client_list, node) + hnae3_match_n_instantiate(client, ae_dev, false); ae_algo->ops->uninit_ae_dev(ae_dev); hnae_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0); From b5b7db8d680464b1d631fd016f5e093419f0bfd9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 10:05:57 -0700 Subject: [PATCH 122/279] tcp: fastopen: fix on syn-data transmit failure Our recent change exposed a bug in TCP Fastopen Client that syzkaller found right away [1] When we prepare skb with SYN+DATA, we attempt to transmit it, and we update socket state as if the transmit was a success. In socket RTX queue we have two skbs, one with the SYN alone, and a second one containing the DATA. When (malicious) ACK comes in, we now complain that second one had no skb_mstamp. The proper fix is to make sure that if the transmit failed, we do not pretend we sent the DATA skb, and make it our send_head. When 3WHS completes, we can now send the DATA right away, without having to wait for a timeout. [1] WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117() WARN_ON_ONCE(last_ackt == 0); Modules linked in: CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000 ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0 Call Trace: [] __dump_stack [] dump_stack+0xc1/0x124 [] warn_slowpath_common+0xe2/0x150 [] warn_slowpath_null+0x2e/0x40 [] tcp_clean_rtx_queue+0x2057/0x2ab0 n [] tcp_ack+0x151d/0x3930 [] tcp_rcv_state_process+0x1c69/0x4fd0 [] tcp_v4_do_rcv+0x54f/0x7c0 [] sk_backlog_rcv [] __release_sock+0x12b/0x3a0 [] release_sock+0x5e/0x1c0 [] inet_wait_for_connect [] __inet_stream_connect+0x545/0xc50 [] tcp_sendmsg_fastopen [] tcp_sendmsg+0x2298/0x35a0 [] inet_sendmsg+0xe5/0x520 [] sock_sendmsg_nosec [] sock_sendmsg+0xcf/0x110 Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully") Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Neal Cardwell Cc: Yuchung Cheng Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 517d737059d1..0bc9e46a5369 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3389,6 +3389,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) goto done; } + /* data was not sent, this is our new send_head */ + sk->sk_send_head = syn_data; + tp->packets_out -= tcp_skb_pcount(syn_data); + fallback: /* Send a regular SYN with Fast Open cookie request option */ if (fo->cookie.len > 0) @@ -3441,6 +3445,11 @@ int tcp_connect(struct sock *sk) */ tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; + buff = tcp_send_head(sk); + if (unlikely(buff)) { + tp->snd_nxt = TCP_SKB_CB(buff)->seq; + tp->pushed_seq = TCP_SKB_CB(buff)->seq; + } TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ From f55956065ec94e3e9371463d693a1029c4cc3007 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 19 Sep 2017 19:35:18 +0200 Subject: [PATCH 123/279] net: emac: Fix napi poll list corruption This patch is pretty much a carbon copy of commit 3079c652141f ("caif: Fix napi poll list corruption") with "caif" replaced by "emac". The commit d75b1ade567f ("net: less interrupt masking in NAPI") breaks emac. It is now required that if the entire budget is consumed when poll returns, the napi poll_list must remain empty. However, like some other drivers emac tries to do a last-ditch check and if there is more work it will call napi_reschedule and then immediately process some of this new work. Should the entire budget be consumed while processing such new work then we will violate the new caller contract. This patch fixes this by not touching any work when we reschedule in emac. Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/emac/mal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 2c74baa2398a..fff09dcf9e34 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *napi, int budget) unsigned long flags; MAL_DBG2(mal, "poll(%d)" NL, budget); - again: + /* Process TX skbs */ list_for_each(l, &mal->poll_list) { struct mal_commac *mc = @@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *napi, int budget) spin_lock_irqsave(&mal->lock, flags); mal_disable_eob_irq(mal); spin_unlock_irqrestore(&mal->lock, flags); - goto again; } mc->ops->poll_tx(mc->dev); } From 7c30013133964aaa2f45c17d6e9782ac6cfd7f5f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 20 Sep 2017 00:44:21 +0200 Subject: [PATCH 124/279] bpf: fix ri->map_owner pointer on bpf_prog_realloc Commit 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") passed the pointer to the prog itself to be loaded into r4 prior on bpf_redirect_map() helper call, so that we can store the owner into ri->map_owner out of the helper. Issue with that is that the actual address of the prog is still subject to change when subsequent rewrites occur that require slow path in bpf_prog_realloc() to alloc more memory, e.g. from patching inlining helper functions or constant blinding. Thus, we really need to take prog->aux as the address we're holding, which also works with prog clones as they share the same aux object. Instead of then fetching aux->prog during runtime, which could potentially incur cache misses due to false sharing, we are going to just use aux for comparison on the map owner. This will also keep the patchlet of the same size, and later check in xdp_map_invalid() only accesses read-only aux pointer from the prog, it's also in the same cacheline already from prior access when calling bpf_func. Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 7 ++++++- net/core/filter.c | 24 +++++++++++++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 799b2451ef2d..b914fbe1383e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4205,7 +4205,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) } if (insn->imm == BPF_FUNC_redirect_map) { - u64 addr = (unsigned long)prog; + /* Note, we cannot use prog directly as imm as subsequent + * rewrites would still change the prog pointer. The only + * stable address we can use is aux, which also works with + * prog clones during blinding. + */ + u64 addr = (unsigned long)prog->aux; struct bpf_insn r4_ld[] = { BPF_LD_IMM64(BPF_REG_4, addr), *insn, diff --git a/net/core/filter.c b/net/core/filter.c index 24dd33dd9f04..82edad58d066 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1794,7 +1794,7 @@ struct redirect_info { u32 flags; struct bpf_map *map; struct bpf_map *map_to_flush; - const struct bpf_prog *map_owner; + unsigned long map_owner; }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); @@ -2500,11 +2500,17 @@ void xdp_do_flush_map(void) } EXPORT_SYMBOL_GPL(xdp_do_flush_map); +static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, + unsigned long aux) +{ + return (unsigned long)xdp_prog->aux != aux; +} + static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); - const struct bpf_prog *map_owner = ri->map_owner; + unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; struct net_device *fwd = NULL; u32 index = ri->ifindex; @@ -2512,9 +2518,9 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, ri->ifindex = 0; ri->map = NULL; - ri->map_owner = NULL; + ri->map_owner = 0; - if (unlikely(map_owner != xdp_prog)) { + if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { err = -EFAULT; map = NULL; goto err; @@ -2574,7 +2580,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); - const struct bpf_prog *map_owner = ri->map_owner; + unsigned long map_owner = ri->map_owner; struct bpf_map *map = ri->map; struct net_device *fwd = NULL; u32 index = ri->ifindex; @@ -2583,10 +2589,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, ri->ifindex = 0; ri->map = NULL; - ri->map_owner = NULL; + ri->map_owner = 0; if (map) { - if (unlikely(map_owner != xdp_prog)) { + if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { err = -EFAULT; map = NULL; goto err; @@ -2632,7 +2638,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; ri->map = NULL; - ri->map_owner = NULL; + ri->map_owner = 0; return XDP_REDIRECT; } @@ -2646,7 +2652,7 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = { }; BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, - const struct bpf_prog *, map_owner) + unsigned long, map_owner) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); From 6819a14ecbe2e089e5c5bb74edecafdde2028a00 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Mon, 4 Sep 2017 15:52:55 +0100 Subject: [PATCH 125/279] net: ipv6: fix regression of no RTM_DELADDR sent after DAD failure Commit f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative addresses") incorrectly assumes that no RTM_NEWADDR are sent for addresses in tentative state, as this does happen for the standard IPv6 use-case of DAD failure, see the call to ipv6_ifa_notify() in addconf_dad_stop(). So as a result of this change, no RTM_DELADDR is sent after DAD failure for a link-local when strict DAD (accept_dad=2) is configured, or on the next admin down in other cases. The absence of this notification breaks backwards compatibility and causes problems after DAD failure if this notification was being relied on. The solution is to allow RTM_DELADDR to still be sent after DAD failure. Fixes: f784ad3d79e5 ("ipv6: do not send RTM_DELADDR for tentative addresses") Signed-off-by: Mike Manning Cc: Mahesh Bandewar Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c2e2a78787ec..d7dbcc8eda10 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4940,9 +4940,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) /* Don't send DELADDR notification for TENTATIVE address, * since NEWADDR notification is sent only after removing - * TENTATIVE flag. + * TENTATIVE flag, if DAD has not failed. */ - if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR) + if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) && + event == RTM_DELADDR) return; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); From 35e015e1f5773417952fe91ce8790baf9b4237a2 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 12 Sep 2017 17:46:37 +0200 Subject: [PATCH 126/279] ipv6: fix net.ipv6.conf.all interface DAD handlers Currently, writing into net.ipv6.conf.all.{accept_dad,use_optimistic,optimistic_dad} has no effect. Fix handling of these flags by: - using the maximum of global and per-interface values for the accept_dad flag. That is, if at least one of the two values is non-zero, enable DAD on the interface. If at least one value is set to 2, enable DAD and disable IPv6 operation on the interface if MAC-based link-local address was found - using the logical OR of global and per-interface values for the optimistic_dad flag. If at least one of them is set to one, optimistic duplicate address detection (RFC 4429) is enabled on the interface - using the logical OR of global and per-interface values for the use_optimistic flag. If at least one of them is set to one, optimistic addresses won't be marked as deprecated during source address selection on the interface. While at it, as we're modifying the prototype for ipv6_use_optimistic_addr(), drop inline, and let the compiler decide. Fixes: 7fd2561e4ebd ("net: ipv6: Add a sysctl to make optimistic addresses useful candidates") Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 18 +++++++++++++---- net/ipv6/addrconf.c | 27 +++++++++++++++++++------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index b3345d0fe0a6..77f4de59dc9c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1680,6 +1680,9 @@ accept_dad - INTEGER 2: Enable DAD, and disable IPv6 operation if MAC-based duplicate link-local address has been found. + DAD operation and mode on a given interface will be selected according + to the maximum value of conf/{all,interface}/accept_dad. + force_tllao - BOOLEAN Enable sending the target link-layer address option even when responding to a unicast neighbor solicitation. @@ -1727,16 +1730,23 @@ suppress_frag_ndisc - INTEGER optimistic_dad - BOOLEAN Whether to perform Optimistic Duplicate Address Detection (RFC 4429). - 0: disabled (default) - 1: enabled + 0: disabled (default) + 1: enabled + + Optimistic Duplicate Address Detection for the interface will be enabled + if at least one of conf/{all,interface}/optimistic_dad is set to 1, + it will be disabled otherwise. use_optimistic - BOOLEAN If enabled, do not classify optimistic addresses as deprecated during source address selection. Preferred addresses will still be chosen before optimistic addresses, subject to other ranking in the source address selection algorithm. - 0: disabled (default) - 1: enabled + 0: disabled (default) + 1: enabled + + This will be enabled if at least one of + conf/{all,interface}/use_optimistic is set to 1, disabled otherwise. stable_secret - IPv6 address This IPv6 address will be used as a secret to generate IPv6 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d7dbcc8eda10..96861c702c06 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1399,10 +1399,18 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +static bool ipv6_use_optimistic_addr(struct net *net, + struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; + if (!idev) + return false; + if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + return false; + if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) + return false; + + return true; #else return false; #endif @@ -1472,7 +1480,7 @@ static int ipv6_get_saddr_eval(struct net *net, /* Rule 3: Avoid deprecated and optimistic addresses */ u8 avoid = IFA_F_DEPRECATED; - if (!ipv6_use_optimistic_addr(score->ifa->idev)) + if (!ipv6_use_optimistic_addr(net, score->ifa->idev)) avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || !(score->ifa->flags & avoid); @@ -2460,7 +2468,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, int max_addresses = in6_dev->cnf.max_addresses; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (in6_dev->cnf.optimistic_dad && + if ((net->ipv6.devconf_all->optimistic_dad || + in6_dev->cnf.optimistic_dad) && !net->ipv6.devconf_all->forwarding && sllao) addr_flags |= IFA_F_OPTIMISTIC; #endif @@ -3051,7 +3060,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, u32 addr_flags = flags | IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (idev->cnf.optimistic_dad && + if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || + idev->cnf.optimistic_dad) && !dev_net(idev->dev)->ipv6.devconf_all->forwarding) addr_flags |= IFA_F_OPTIMISTIC; #endif @@ -3810,6 +3820,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || + dev_net(dev)->ipv6.devconf_all->accept_dad < 1 || idev->cnf.accept_dad < 1 || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { @@ -3841,7 +3852,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) */ if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); - if (ipv6_use_optimistic_addr(idev)) { + if (ipv6_use_optimistic_addr(dev_net(dev), idev)) { /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ @@ -3897,7 +3908,9 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 && + if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || + idev->cnf.accept_dad > 1) && + !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { struct in6_addr addr; From 23586b66d84ba3184b8820277f3fc42761640f87 Mon Sep 17 00:00:00 2001 From: Steve French Date: Mon, 18 Sep 2017 18:18:45 -0500 Subject: [PATCH 127/279] Fix SMB3.1.1 guest authentication to Samba Samba rejects SMB3.1.1 dialect (vers=3.1.1) negotiate requests from the kernel client due to the two byte pad at the end of the negotiate contexts. CC: Stable Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5c16591a128e..b0eaebe627e9 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -439,7 +439,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req) build_encrypt_ctxt((struct smb2_encryption_neg_context *)pneg_ctxt); req->NegotiateContextOffset = cpu_to_le32(OFFSET_OF_NEG_CONTEXT); req->NegotiateContextCount = cpu_to_le16(2); - inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + 2 + inc_rfc1001_len(req, 4 + sizeof(struct smb2_preauth_neg_context) + sizeof(struct smb2_encryption_neg_context)); /* calculate hash */ } #else From 04fc52fb222d35e1f7a0d5d85b19a676ea1e10e8 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Tue, 5 Sep 2017 14:23:02 +0200 Subject: [PATCH 128/279] drm/exynos/hdmi: Fix unsafe list iteration Function hdmi_mode_fixup() used bare list_for_each entry, which was unsafe and caused memory corruption detected by kasan. It now uses drm_for_each_connector_iter macro, which is now recommended by the documentation and safe. Signed-off-by: Maciej Purski Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 214fa5e51963..0109ff40b1db 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -944,22 +944,27 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct drm_connector *connector; struct drm_display_mode *m; + struct drm_connector_list_iter conn_iter; int mode_ok; drm_mode_set_crtcinfo(adjusted_mode, 0); - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->encoder == encoder) break; } + if (connector) + drm_connector_get(connector); + drm_connector_list_iter_end(&conn_iter); - if (connector->encoder != encoder) + if (!connector) return true; mode_ok = hdmi_mode_valid(connector, adjusted_mode); if (mode_ok == MODE_OK) - return true; + goto cleanup; /* * Find the most suitable mode and copy it to adjusted_mode. @@ -979,6 +984,9 @@ static bool hdmi_mode_fixup(struct drm_encoder *encoder, } } +cleanup: + drm_connector_put(connector); + return true; } From 8632ec8cdcaead67c019f1ba36760d1bdd0c5d23 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2017 15:37:53 +1000 Subject: [PATCH 129/279] powerpc/configs: Update for CONFIG_SND changes Commit eb3b705aaed9 ("ALSA: Make CONFIG_SND_OSSEMUL user-selectable") means we need to set CONFIG_SND_OSSEMUL in our configs, otherwise we lose some of the SND symbols. And commit 0181307abc1d ("ALSA: seq: Reorganize kconfig and build") reorganised things, which causes the churn. Signed-off-by: Michael Ellerman --- arch/powerpc/configs/g5_defconfig | 5 +++-- arch/powerpc/configs/gamecube_defconfig | 5 +++-- arch/powerpc/configs/pasemi_defconfig | 3 ++- arch/powerpc/configs/pmac32_defconfig | 7 ++++--- arch/powerpc/configs/ppc64_defconfig | 7 ++++--- arch/powerpc/configs/ppc64e_defconfig | 7 ++++--- arch/powerpc/configs/ppc6xx_defconfig | 7 ++++--- arch/powerpc/configs/wii_defconfig | 5 +++-- 8 files changed, 27 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index e084fa548d73..063817fee61c 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -138,10 +138,11 @@ CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQUENCER_OSS=m CONFIG_SND_POWERMAC=m CONFIG_SND_AOA=m CONFIG_SND_AOA_FABRIC_LAYOUT=m diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index 79bbc8238b32..805b0f87653c 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -64,11 +64,12 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_CLUT224 is not set CONFIG_SOUND=y CONFIG_SND=y -CONFIG_SND_SEQUENCER=y +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y # CONFIG_SND_VERBOSE_PROCFS is not set +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQUENCER_OSS=y # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 8cf4a46bef86..6daa56f8895c 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -115,9 +115,10 @@ CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_LOGO=y CONFIG_SOUND=y CONFIG_SND=y -CONFIG_SND_SEQUENCER=y +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y +CONFIG_SND_SEQUENCER=y CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_USB_AUDIO=y CONFIG_SND_USB_USX2Y=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 8e798b1fbc99..1aab9a62a681 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -227,11 +227,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_SEQUENCER_OSS=m CONFIG_SND_DUMMY=m CONFIG_SND_POWERMAC=m CONFIG_SND_AOA=m diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 791db775a09c..6ddca80c52c3 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -222,11 +222,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_SEQUENCER_OSS=m CONFIG_SND_POWERMAC=m CONFIG_SND_AOA=m CONFIG_SND_AOA_FABRIC_LAYOUT=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index d0fe0f8f77c2..41d85cb3c9a2 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -141,11 +141,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_SEQUENCER_OSS=m CONFIG_HID_DRAGONRISE=y CONFIG_HID_GYRATION=y CONFIG_HID_TWINHAN=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index ae6eba482d75..da0e8d535eb8 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -789,17 +789,18 @@ CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m -CONFIG_SND_SEQUENCER_OSS=y CONFIG_SND_DYNAMIC_MINORS=y # CONFIG_SND_SUPPORT_OLD_API is not set CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_DEBUG=y CONFIG_SND_DEBUG_VERBOSE=y CONFIG_SND_PCM_XRUN_DEBUG=y +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_SEQUENCER_OSS=m CONFIG_SND_DUMMY=m CONFIG_SND_VIRMIDI=m CONFIG_SND_MTPAV=m diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index aef41b17a8bc..9c7400a19e9d 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -79,11 +79,12 @@ CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_SOUND=y CONFIG_SND=y -CONFIG_SND_SEQUENCER=y +CONFIG_SND_OSSEMUL=y CONFIG_SND_MIXER_OSS=y CONFIG_SND_PCM_OSS=y -CONFIG_SND_SEQUENCER_OSS=y # CONFIG_SND_VERBOSE_PROCFS is not set +CONFIG_SND_SEQUENCER=y +CONFIG_SND_SEQUENCER_OSS=y CONFIG_HID_APPLE=m CONFIG_HID_WACOM=m CONFIG_MMC=y From 4917fcb58cc73f6b81455e3c5f960144809ddf1a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 19 Sep 2017 11:47:06 +0530 Subject: [PATCH 130/279] powerpc/sysrq: Fix oops whem ppmu is not registered Kernel crashes if power pmu is not registered and user tries to dump regs with 'echo p > /proc/sysrq-trigger'. Sample log: Unable to handle kernel paging request for data at address 0x00000008 Faulting instruction address: 0xc0000000000d52f0 NIP [c0000000000d52f0] perf_event_print_debug+0x10/0x230 LR [c00000000058a938] sysrq_handle_showregs+0x38/0x50 Call Trace: printk+0x38/0x4c (unreliable) __handle_sysrq+0xe4/0x270 write_sysrq_trigger+0x64/0x80 proc_reg_write+0x80/0xd0 __vfs_write+0x40/0x200 vfs_write+0xc8/0x240 SyS_write+0x60/0x110 system_call+0x58/0x6c Fixes: 5f6d0380c640 ("powerpc/perf: Define perf_event_print_debug() to print PMU register values") Signed-off-by: Ravi Bangoria Reviewed-by: Kamalesh Babulal Signed-off-by: Michael Ellerman --- arch/powerpc/perf/core-book3s.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 2e3eb7431571..9e3da168d54c 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -793,6 +793,11 @@ void perf_event_print_debug(void) u32 pmcs[MAX_HWEVENTS]; int i; + if (!ppmu) { + pr_info("Performance monitor hardware not registered.\n"); + return; + } + if (!ppmu->n_counter) return; From c1fa0768a8713b135848f78fd43ffc208d8ded70 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Wed, 13 Sep 2017 22:13:48 -0400 Subject: [PATCH 131/279] powerpc/tm: Flush TM only if CPU has TM feature Commit cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump") added code to access TM SPRs in flush_tmregs_to_thread(). However flush_tmregs_to_thread() does not check if TM feature is available on CPU before trying to access TM SPRs in order to copy live state to thread structures. flush_tmregs_to_thread() is indeed guarded by CONFIG_PPC_TRANSACTIONAL_MEM but it might be the case that kernel was compiled with CONFIG_PPC_TRANSACTIONAL_MEM enabled and ran on a CPU without TM feature available, thus rendering the execution of TM instructions that are treated by the CPU as illegal instructions. The fix is just to add proper checking in flush_tmregs_to_thread() if CPU has the TM feature before accessing any TM-specific resource, returning immediately if TM is no available on the CPU. Adding that checking in flush_tmregs_to_thread() instead of in places where it is called, like in vsr_get() and vsr_set(), is better because avoids the same problem cropping up elsewhere. Cc: stable@vger.kernel.org # v4.13+ Fixes: cd63f3c ("powerpc/tm: Fix saving of TM SPRs in core dump") Signed-off-by: Gustavo Romero Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 07cd22e35405..f52ad5bb7109 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) * in the appropriate thread structures from live. */ - if (tsk != current) + if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current)) return; if (MSR_TM_SUSPENDED(mfmsr())) { From ad47ff3e33503e0969db2d4f9a40942aa6414598 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 19 Sep 2017 20:45:52 +1000 Subject: [PATCH 132/279] powerpc/sstep: Fix issues with set_cr0() set_cr0() broke when we changed analyse_instr() to not modify the register state. Instead of looking at regs->gpr[x] which has not been updated yet, we need to look at op->val. Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs") Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index fb9f58b868e7..9d72e5900320 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -944,9 +944,9 @@ NOKPROBE_SYMBOL(emulate_dcbz); : "r" (addr), "i" (-EFAULT), "0" (err)) static nokprobe_inline void set_cr0(const struct pt_regs *regs, - struct instruction_op *op, int rd) + struct instruction_op *op) { - long val = regs->gpr[rd]; + long val = op->val; op->type |= SETCC; op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); @@ -1326,7 +1326,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 13: /* addic. */ imm = (short) instr; add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0); - set_cr0(regs, op, rd); + set_cr0(regs, op); return 1; case 14: /* addi */ @@ -1397,13 +1397,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 28: /* andi. */ op->val = regs->gpr[rd] & (unsigned short) instr; - set_cr0(regs, op, ra); + set_cr0(regs, op); goto logical_done_nocc; case 29: /* andis. */ imm = (unsigned short) instr; op->val = regs->gpr[rd] & (imm << 16); - set_cr0(regs, op, ra); + set_cr0(regs, op); goto logical_done_nocc; #ifdef __powerpc64__ @@ -2526,7 +2526,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, logical_done: if (instr & 1) - set_cr0(regs, op, ra); + set_cr0(regs, op); logical_done_nocc: op->reg = ra; op->type |= SETREG; @@ -2534,7 +2534,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, arith_done: if (instr & 1) - set_cr0(regs, op, rd); + set_cr0(regs, op); compute_done: op->reg = rd; op->type |= SETREG; From 5bcaa4cc41923871777c3d13906267e812775094 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 19 Sep 2017 20:45:53 +1000 Subject: [PATCH 133/279] powerpc/sstep: Fix issues with mcrf mcrf broke when we changed analyse_instr() to not modify the register state. The instruction writes to the CR, so we need to store the result in op->ccval, not op->val. Fixes: 3cdfcbfd32b9 ("powerpc: Change analyse_instr so it doesn't modify *regs") Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 9d72e5900320..c4cda1afb49d 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1513,10 +1513,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = COMPUTE + SETCC; imm = 0xf0000000UL; val = regs->gpr[rd]; - op->val = regs->ccr; + op->ccval = regs->ccr; for (sh = 0; sh < 8; ++sh) { if (instr & (0x80000 >> sh)) - op->val = (op->val & ~imm) | + op->ccval = (op->ccval & ~imm) | (val & imm); imm >>= 4; } From 1575fe06f6b1d156ed31fb22c8631d49d79751d8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 20 Sep 2017 09:32:19 +1000 Subject: [PATCH 134/279] powerpc/sstep: mullw should calculate a 64 bit signed result mullw should do a 32 bit signed multiply and create a 64 bit signed result. It currently truncates the result to 32 bits. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index c4cda1afb49d..5e8418c28bd8 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1651,8 +1651,9 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto arith_done; case 235: /* mullw */ - op->val = (unsigned int) regs->gpr[ra] * - (unsigned int) regs->gpr[rb]; + op->val = (long)(int) regs->gpr[ra] * + (int) regs->gpr[rb]; + goto arith_done; case 266: /* add */ From 5d298baa41883fc421acfd932799c0f4177249ae Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 31 Aug 2017 17:17:41 +0530 Subject: [PATCH 135/279] powerpc/powernv: Clear LPCR[PECE1] via stop-api only for deep state offline Commit 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via stop-api only on Hotplug") clears the PECE1 bit of the LPCR via stop-api during CPU-Hotplug to prevent wakeup due to a decrementer on an offlined CPU which is in a deep stop state. In the case where the stop-api support is found to be lacking, the commit 785a12afdb4a ("powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails") disables deep states that lose hypervisor context. Thus in this case, the offlined CPU will be put to some shallow idle state. However, we currently unconditionally clear the PECE1 in LPCR via stop-api during CPU-Hotplug even when deep states are disabled due to stop-api failure. Fix this by clearing PECE1 of LPCR via stop-api during CPU-Hotplug *only* when the offlined CPU will be put to a deep state that loses hypervisor context. Fixes: 24be85a23d1f ("powerpc/powernv: Clear PECE1 in LPCR via stop-api only on Hotplug") Reported-by: Pavithra Prakash Signed-off-by: Gautham R. Shenoy Reviewed-by: Nicholas Piggin Tested-by: Pavithra Prakash Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 9f59041a172b..443d5ca71995 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -393,7 +393,13 @@ static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) u64 pir = get_hard_smp_processor_id(cpu); mtspr(SPRN_LPCR, lpcr_val); - opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); + + /* + * Program the LPCR via stop-api only if the deepest stop state + * can lose hypervisor context. + */ + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) + opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); } /* From 590d08d3da45e9fed423b08ab38d71886c07abc8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Sep 2017 11:43:47 -0500 Subject: [PATCH 136/279] SMB3: Fix endian warning Multi-dialect negotiate patch had a minor endian error. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable # 4.13+ --- fs/cifs/smb2pdu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b0eaebe627e9..b4c58a1db1ae 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -570,10 +570,11 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) /* ops set to 3.0 by default for default so update */ ses->server->ops = &smb21_operations; } - } else if (rsp->DialectRevision != ses->server->vals->protocol_id) { + } else if (le16_to_cpu(rsp->DialectRevision) != + ses->server->vals->protocol_id) { /* if requested single dialect ensure returned dialect matched */ cifs_dbg(VFS, "Illegal 0x%x dialect returned: not requested\n", - cpu_to_le16(rsp->DialectRevision)); + le16_to_cpu(rsp->DialectRevision)); return -EIO; } From c721c38957fb19982416f6be71aae7b30630d83b Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 19 Sep 2017 18:40:03 -0500 Subject: [PATCH 137/279] SMB3: Warn user if trying to sign connection that authenticated as guest It can be confusing if user ends up authenticated as guest but they requested signing (server will return error validating signed packets) so add log message for this. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable --- fs/cifs/smb2pdu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index b4c58a1db1ae..d499ce265c3b 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1176,6 +1176,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, while (sess_data->func) sess_data->func(sess_data); + if ((ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) && (ses->sign)) + cifs_dbg(VFS, "signing requested but authenticated as guest\n"); rc = sess_data->result; out: kfree(sess_data); From 6e82e929d98552ed5156919e0f532eeba2da704b Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 20 Sep 2017 13:09:23 +1000 Subject: [PATCH 138/279] cifs: show 'soft' in the mount options for hard mounts Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 180b3356ff86..a864e6575309 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -461,6 +461,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nocase"); if (tcon->retry) seq_puts(s, ",hard"); + else + seq_puts(s, ",soft"); if (tcon->use_persistent) seq_puts(s, ",persistenthandles"); else if (tcon->use_resilient) From fd0b19ed5389187829b854900511c9195875bb42 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 19 Sep 2017 22:07:18 -0700 Subject: [PATCH 139/279] MIPS: Fix perf event init Commit c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned") modified mipspmu_event_init() to cast the struct perf_event cpu field to an unsigned integer before it is compared with nr_cpumask_bits (and *ahem* did so without copying the linux-mips mailing list or any MIPS developers...). This is broken because the cpu field may be -1 for events which follow a process rather than being affine to a particular CPU. When this is the case the cast to an unsigned int results in a value equal to ULONG_MAX, which is always greater than nr_cpumask_bits so we always fail mipspmu_event_init() and return -ENODEV. The check against nr_cpumask_bits seems nonsensical anyway, so this patch simply removes it. The cpu field is going to either be -1 or a valid CPU number. Comparing it with nr_cpumask_bits is effectively checking that it's a valid cpu number, but it seems safe to rely on the core perf events code to ensure that's the case. The end result is that this fixes use of perf on MIPS when not constraining events to a particular CPU, and fixes the "perf list hw" command which fails to list any events without this. Signed-off-by: Paul Burton Fixes: c311c797998c ("cpumask: make "nr_cpumask_bits" unsigned") Cc: Alexey Dobriyan Cc: Andrew Morton Cc: linux-mips@linux-mips.org Cc: stable # v4.12+ Patchwork: https://patchwork.linux-mips.org/patch/17323/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/perf_event_mipsxx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 9e6c74bf66c4..6668f67a61c3 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -618,8 +618,7 @@ static int mipspmu_event_init(struct perf_event *event) return -ENOENT; } - if ((unsigned int)event->cpu >= nr_cpumask_bits || - (event->cpu >= 0 && !cpu_online(event->cpu))) + if (event->cpu >= 0 && !cpu_online(event->cpu)) return -ENODEV; if (!atomic_inc_not_zero(&active_events)) { From bd6227a150fdb56e7bb734976ef6e53a2c1cb334 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 14 Sep 2017 17:10:28 +0200 Subject: [PATCH 140/279] crypto: drbg - fix freeing of resources During the change to use aligned buffers, the deallocation code path was not updated correctly. The current code tries to free the aligned buffer pointer and not the original buffer pointer as it is supposed to. Thus, the code is updated to free the original buffer pointer and set the aligned buffer pointer that is used throughout the code to NULL. Fixes: 3cfc3b9721123 ("crypto: drbg - use aligned buffers") CC: CC: Herbert Xu Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/drbg.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crypto/drbg.c b/crypto/drbg.c index 633a88e93ab0..70018397e59a 100644 --- a/crypto/drbg.c +++ b/crypto/drbg.c @@ -1133,10 +1133,10 @@ static inline void drbg_dealloc_state(struct drbg_state *drbg) { if (!drbg) return; - kzfree(drbg->V); - drbg->Vbuf = NULL; - kzfree(drbg->C); - drbg->Cbuf = NULL; + kzfree(drbg->Vbuf); + drbg->V = NULL; + kzfree(drbg->Cbuf); + drbg->C = NULL; kzfree(drbg->scratchpadbuf); drbg->scratchpadbuf = NULL; drbg->reseed_ctr = 0; From 569f11c9f788959b640116b5bbd6d8a1f07326da Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:00 -0500 Subject: [PATCH 141/279] crypto: x86/blowfish - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R12 instead of RBP. R12 can't be used as the RT0 register because of x86 instruction encoding limitations. So use R12 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish-x86_64-asm_64.S | 48 +++++++++++++----------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 246c67006ed0..8c1fcb6bad21 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -33,7 +33,7 @@ #define s3 ((16 + 2 + (3 * 256)) * 4) /* register macros */ -#define CTX %rdi +#define CTX %r12 #define RIO %rsi #define RX0 %rax @@ -56,12 +56,12 @@ #define RX2bh %ch #define RX3bh %dh -#define RT0 %rbp +#define RT0 %rdi #define RT1 %rsi #define RT2 %r8 #define RT3 %r9 -#define RT0d %ebp +#define RT0d %edi #define RT1d %esi #define RT2d %r8d #define RT3d %r9d @@ -120,13 +120,14 @@ ENTRY(__blowfish_enc_blk) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src * %rcx: bool, if true: xor output */ - movq %rbp, %r11; + movq %r12, %r11; + movq %rdi, CTX; movq %rsi, %r10; movq %rdx, RIO; @@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk) round_enc(14); add_roundkey_enc(16); - movq %r11, %rbp; + movq %r11, %r12; movq %r10, RIO; test %cl, %cl; @@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk) ENTRY(blowfish_dec_blk) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ - movq %rbp, %r11; + movq %r12, %r11; + movq %rdi, CTX; movq %rsi, %r10; movq %rdx, RIO; @@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk) movq %r10, RIO; write_block(); - movq %r11, %rbp; + movq %r11, %r12; ret; ENDPROC(blowfish_dec_blk) @@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk) ENTRY(__blowfish_enc_blk_4way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src * %rcx: bool, if true: xor output */ - pushq %rbp; + pushq %r12; pushq %rbx; pushq %rcx; - preload_roundkey_enc(0); - + movq %rdi, CTX movq %rsi, %r11; movq %rdx, RIO; + preload_roundkey_enc(0); + read_block4(); round_enc4(0); @@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way) round_enc4(14); add_preloaded_roundkey4(); - popq %rbp; + popq %r12; movq %r11, RIO; - test %bpl, %bpl; + test %r12b, %r12b; jnz .L__enc_xor4; write_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; .L__enc_xor4: xor_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; ENDPROC(__blowfish_enc_blk_4way) ENTRY(blowfish_dec_blk_4way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ - pushq %rbp; + pushq %r12; pushq %rbx; - preload_roundkey_dec(17); - movq %rsi, %r11; + movq %rdi, CTX; + movq %rsi, %r11 movq %rdx, RIO; + preload_roundkey_dec(17); read_block4(); round_dec4(17); @@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way) write_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; ENDPROC(blowfish_dec_blk_4way) From b46c9d717645529417ca9045cfdbf59f84922573 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:01 -0500 Subject: [PATCH 142/279] crypto: x86/camellia - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R12 instead of RBP. Both are callee-saved registers, so the substitution is straightforward. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia-x86_64-asm_64.S | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 310319c601ed..95ba6956a7f6 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -75,17 +75,17 @@ #define RCD1bh %dh #define RT0 %rsi -#define RT1 %rbp +#define RT1 %r12 #define RT2 %r8 #define RT0d %esi -#define RT1d %ebp +#define RT1d %r12d #define RT2d %r8d #define RT2bl %r8b #define RXOR %r9 -#define RRBP %r10 +#define RR12 %r10 #define RDST %r11 #define RXORd %r9d @@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk) * %rdx: src * %rcx: bool xor */ - movq %rbp, RRBP; + movq %r12, RR12; movq %rcx, RXOR; movq %rsi, RDST; @@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk) enc_outunpack(mov, RT1); - movq RRBP, %rbp; + movq RR12, %r12; ret; .L__enc_xor: enc_outunpack(xor, RT1); - movq RRBP, %rbp; + movq RR12, %r12; ret; ENDPROC(__camellia_enc_blk) @@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk) movl $24, RXORd; cmovel RXORd, RT2d; /* max */ - movq %rbp, RRBP; + movq %r12, RR12; movq %rsi, RDST; movq %rdx, RIO; @@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk) dec_outunpack(); - movq RRBP, %rbp; + movq RR12, %r12; ret; ENDPROC(camellia_dec_blk) @@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way) */ pushq %rbx; - movq %rbp, RRBP; + movq %r12, RR12; movq %rcx, RXOR; movq %rsi, RDST; movq %rdx, RIO; @@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way) enc_outunpack2(mov, RT2); - movq RRBP, %rbp; + movq RR12, %r12; popq %rbx; ret; .L__enc2_xor: enc_outunpack2(xor, RT2); - movq RRBP, %rbp; + movq RR12, %r12; popq %rbx; ret; ENDPROC(__camellia_enc_blk_2way) @@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way) cmovel RXORd, RT2d; /* max */ movq %rbx, RXOR; - movq %rbp, RRBP; + movq %r12, RR12; movq %rsi, RDST; movq %rdx, RIO; @@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way) dec_outunpack2(); - movq RRBP, %rbp; + movq RR12, %r12; movq RXOR, %rbx; ret; ENDPROC(camellia_dec_blk_2way) From 4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:02 -0500 Subject: [PATCH 143/279] crypto: x86/cast5 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R15 instead of RBP. R15 can't be used as the RID1 register because of x86 instruction encoding limitations. So use R15 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 47 +++++++++++++++-------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b4a8806234ea..86107c961bb4 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -47,7 +47,7 @@ /********************************************************************** 16-way AVX cast5 **********************************************************************/ -#define CTX %rdi +#define CTX %r15 #define RL1 %xmm0 #define RR1 %xmm1 @@ -70,8 +70,8 @@ #define RTMP %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %rdi +#define RID1d %edi #define RID2 %rsi #define RID2d %esi @@ -226,7 +226,7 @@ .align 16 __cast5_enc_blk16: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RL1: blocks 1 and 2 * RR1: blocks 3 and 4 * RL2: blocks 5 and 6 @@ -246,9 +246,11 @@ __cast5_enc_blk16: * RR4: encrypted blocks 15 and 16 */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -283,7 +285,7 @@ __cast5_enc_blk16: .L__skip_enc: popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; @@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16) .align 16 __cast5_dec_blk16: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RL1: encrypted blocks 1 and 2 * RR1: encrypted blocks 3 and 4 * RL2: encrypted blocks 5 and 6 @@ -318,9 +320,11 @@ __cast5_dec_blk16: * RR4: decrypted blocks 15 and 16 */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -356,7 +360,7 @@ __cast5_dec_blk16: vmovdqa .Lbswap_mask, RKM; popq %rbx; - popq %rbp; + popq %r15; outunpack_blocks(RR1, RL1, RTMP, RX, RKM); outunpack_blocks(RR2, RL2, RTMP, RX, RKM); @@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16) ENTRY(cast5_ecb_enc_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; vmovdqu (0*4*4)(%rdx), RL1; @@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way) vmovdqu RR4, (6*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11); + popq %r15; FRAME_END ret; ENDPROC(cast5_ecb_enc_16way) ENTRY(cast5_ecb_dec_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + + movq %rdi, CTX; movq %rsi, %r11; vmovdqu (0*4*4)(%rdx), RL1; @@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way) vmovdqu RR4, (6*4*4)(%r11); vmovdqu RL4, (7*4*4)(%r11); + popq %r15; FRAME_END ret; ENDPROC(cast5_ecb_dec_16way) ENTRY(cast5_cbc_dec_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way) vmovdqu RR4, (6*16)(%r11); vmovdqu RL4, (7*16)(%r11); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast5_cbc_dec_16way) ENTRY(cast5_ctr_16way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src * %rcx: iv (big endian, 64bit) */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way) vmovdqu RR4, (6*16)(%r11); vmovdqu RL4, (7*16)(%r11); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast5_ctr_16way) From c66cc3be2951fad4d7d7f799baf57c8c5cc8d655 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:03 -0500 Subject: [PATCH 144/279] crypto: x86/cast6 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R15 instead of RBP. R15 can't be used as the RID1 register because of x86 instruction encoding limitations. So use R15 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 50 +++++++++++++++-------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 952d3156a933..7f30b6f0d72c 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -47,7 +47,7 @@ /********************************************************************** 8-way AVX cast6 **********************************************************************/ -#define CTX %rdi +#define CTX %r15 #define RA1 %xmm0 #define RB1 %xmm1 @@ -70,8 +70,8 @@ #define RTMP %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %rdi +#define RID1d %edi #define RID2 %rsi #define RID2d %esi @@ -264,15 +264,17 @@ .align 8 __cast6_enc_blk8: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks * output: * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -297,7 +299,7 @@ __cast6_enc_blk8: QBAR(11); popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; @@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8) .align 8 __cast6_dec_blk8: /* input: - * %rdi: ctx, CTX + * %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * output: * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -343,7 +347,7 @@ __cast6_dec_blk8: QBAR(0); popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); @@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8) ENTRY(cast6_ecb_enc_8way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); @@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_ecb_enc_8way) ENTRY(cast6_ecb_dec_8way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); @@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_ecb_dec_8way) ENTRY(cast6_cbc_dec_8way) /* input: - * %rdi: ctx, CTX + * %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way) store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast6_cbc_dec_8way) @@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way) * %rcx: iv (little endian, 128bit) */ FRAME_BEGIN - pushq %r12; + pushq %r15 + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way) store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast6_ctr_8way) @@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way) * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX movq %rsi, %r11; /* regs <= src, dst <= IVs, regs <= regs xor IVs */ @@ -464,6 +478,7 @@ ENTRY(cast6_xts_enc_8way) /* dst <= regs xor IVs(in dst) */ store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_xts_enc_8way) @@ -476,7 +491,9 @@ ENTRY(cast6_xts_dec_8way) * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX movq %rsi, %r11; /* regs <= src, dst <= IVs, regs <= regs xor IVs */ @@ -488,6 +505,7 @@ ENTRY(cast6_xts_dec_8way) /* dst <= regs xor IVs(in dst) */ store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_xts_dec_8way) From 3ed7b4d67c6745300c9b5c6baa55da1161b57f60 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:04 -0500 Subject: [PATCH 145/279] crypto: x86/des3_ede - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use RSI instead of RBP for RT1. Since RSI is also used as a the 'dst' function argument, it needs to be saved on the stack until the argument is needed. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/des3_ede-asm_64.S | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index f3e91647ca27..8e49ce117494 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -64,12 +64,12 @@ #define RW2bh %ch #define RT0 %r15 -#define RT1 %rbp +#define RT1 %rsi #define RT2 %r14 #define RT3 %rdx #define RT0d %r15d -#define RT1d %ebp +#define RT1d %esi #define RT2d %r14d #define RT3d %edx @@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk) * %rsi: dst * %rdx: src */ - pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; + pushq %rsi; /* dst */ + read_block(%rdx, RL0, RR0); initial_permutation(RL0, RR0); @@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk) round1(32+15, RL0, RR0, dummy2); final_permutation(RR0, RL0); + + popq %rsi /* dst */ write_block(%rsi, RR0, RL0); popq %r15; @@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk) popq %r13; popq %r12; popq %rbx; - popq %rbp; ret; ENDPROC(des3_ede_x86_64_crypt_blk) @@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) * %rdx: src (3 blocks) */ - pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; + pushq %rsi /* dst */ + /* load input */ movl 0 * 4(%rdx), RL0d; movl 1 * 4(%rdx), RR0d; @@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) bswapl RR2d; bswapl RL2d; + popq %rsi /* dst */ movl RR0d, 0 * 4(%rsi); movl RL0d, 1 * 4(%rsi); movl RR1d, 2 * 4(%rsi); @@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) popq %r13; popq %r12; popq %rbx; - popq %rbp; ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) From d7b1722c72aa915283ada27709c6feeb392f6038 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:05 -0500 Subject: [PATCH 146/279] crypto: x86/sha1-avx2 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R11 instead of RBP. Since R11 isn't a callee-saved register, it doesn't need to be saved and restored on the stack. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 1eab79c9ac48..9f712a7dfd79 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -89,7 +89,7 @@ #define REG_RE %rdx #define REG_RTA %r12 #define REG_RTB %rbx -#define REG_T1 %ebp +#define REG_T1 %r11d #define xmm_mov vmovups #define avx2_zeroupper vzeroupper #define RND_F1 1 @@ -637,7 +637,6 @@ _loop3: ENTRY(\name) push %rbx - push %rbp push %r12 push %r13 push %r14 @@ -673,7 +672,6 @@ _loop3: pop %r14 pop %r13 pop %r12 - pop %rbp pop %rbx ret From 6488bce756861b94810e54f83416d5e74c0f18bf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:06 -0500 Subject: [PATCH 147/279] crypto: x86/sha1-ssse3 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Swap the usages of R12 and RBP. Use R12 for the REG_D register, and use RBP to store the pre-aligned stack pointer. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_asm.S | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index a4109506a5e8..6204bd53528c 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -37,7 +37,7 @@ #define REG_A %ecx #define REG_B %esi #define REG_C %edi -#define REG_D %ebp +#define REG_D %r12d #define REG_E %edx #define REG_T1 %eax @@ -74,10 +74,10 @@ ENTRY(\name) push %rbx - push %rbp push %r12 + push %rbp + mov %rsp, %rbp - mov %rsp, %r12 sub $64, %rsp # allocate workspace and $~15, %rsp # align stack @@ -99,10 +99,9 @@ xor %rax, %rax rep stosq - mov %r12, %rsp # deallocate workspace - - pop %r12 + mov %rbp, %rsp # deallocate workspace pop %rbp + pop %r12 pop %rbx ret From 673ac6fbc74f835e2125df9ee39e8a2a423832e2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:07 -0500 Subject: [PATCH 148/279] crypto: x86/sha256-avx - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Swap the usages of R12 and RBP. Use R12 for the TBL register, and use RBP to store the pre-aligned stack pointer. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-avx-asm.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index e08888a1a5f2..001bbcf93c79 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP c = %ecx d = %r8d e = %edx -TBL = %rbp +TBL = %r12 a = %eax b = %ebx @@ -350,13 +350,13 @@ a = TMP_ ENTRY(sha256_transform_avx) .align 32 pushq %rbx - pushq %rbp + pushq %r12 pushq %r13 pushq %r14 pushq %r15 - pushq %r12 + pushq %rbp + movq %rsp, %rbp - mov %rsp, %r12 subq $STACK_SIZE, %rsp # allocate stack space and $~15, %rsp # align stack pointer @@ -452,13 +452,12 @@ loop2: done_hash: - mov %r12, %rsp - - popq %r12 + mov %rbp, %rsp + popq %rbp popq %r15 popq %r14 popq %r13 - popq %rbp + popq %r12 popq %rbx ret ENDPROC(sha256_transform_avx) From d3dfbfe2e6e7ecd620531d5201314ad14c4ed5b3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:08 -0500 Subject: [PATCH 149/279] crypto: x86/sha256-avx2 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. There's no need to use RBP as a temporary register for the TBL value, because it always stores the same value: the address of the K256 table. Instead just reference the address of K256 directly. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-avx2-asm.S | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 89c8f09787d2..1420db15dcdd 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -98,8 +98,6 @@ d = %r8d e = %edx # clobbers NUM_BLKS y3 = %esi # clobbers INP - -TBL = %rbp SRND = CTX # SRND is same register as CTX a = %eax @@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE ENTRY(sha256_transform_rorx) .align 32 pushq %rbx - pushq %rbp pushq %r12 pushq %r13 pushq %r14 @@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx) mov CTX, _CTX(%rsp) loop0: - lea K256(%rip), TBL - ## Load first 16 dwords from two blocks VMOVDQ 0*32(INP),XTMP0 VMOVDQ 1*32(INP),XTMP1 @@ -597,19 +592,19 @@ last_block_enter: .align 16 loop1: - vpaddd 0*32(TBL, SRND), X0, XFER + vpaddd K256+0*32(SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 0*32 - vpaddd 1*32(TBL, SRND), X0, XFER + vpaddd K256+1*32(SRND), X0, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 1*32 - vpaddd 2*32(TBL, SRND), X0, XFER + vpaddd K256+2*32(SRND), X0, XFER vmovdqa XFER, 2*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 2*32 - vpaddd 3*32(TBL, SRND), X0, XFER + vpaddd K256+3*32(SRND), X0, XFER vmovdqa XFER, 3*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 3*32 @@ -619,10 +614,11 @@ loop1: loop2: ## Do last 16 rounds with no scheduling - vpaddd 0*32(TBL, SRND), X0, XFER + vpaddd K256+0*32(SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 0*32 - vpaddd 1*32(TBL, SRND), X1, XFER + + vpaddd K256+1*32(SRND), X1, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 1*32 add $2*32, SRND @@ -676,9 +672,6 @@ loop3: ja done_hash do_last_block: - #### do last block - lea K256(%rip), TBL - VMOVDQ 0*16(INP),XWORD0 VMOVDQ 1*16(INP),XWORD1 VMOVDQ 2*16(INP),XWORD2 @@ -718,7 +711,6 @@ done_hash: popq %r14 popq %r13 popq %r12 - popq %rbp popq %rbx ret ENDPROC(sha256_transform_rorx) From 539012dcbdd1ff028268764385ed1f6d600812a7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:09 -0500 Subject: [PATCH 150/279] crypto: x86/sha256-ssse3 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Swap the usages of R12 and RBP. Use R12 for the TBL register, and use RBP to store the pre-aligned stack pointer. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256-ssse3-asm.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 39b83c93e7fd..c6c05ed2c16a 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -95,7 +95,7 @@ SRND = %rsi # clobbers INP c = %ecx d = %r8d e = %edx -TBL = %rbp +TBL = %r12 a = %eax b = %ebx @@ -356,13 +356,13 @@ a = TMP_ ENTRY(sha256_transform_ssse3) .align 32 pushq %rbx - pushq %rbp + pushq %r12 pushq %r13 pushq %r14 pushq %r15 - pushq %r12 + pushq %rbp + mov %rsp, %rbp - mov %rsp, %r12 subq $STACK_SIZE, %rsp and $~15, %rsp @@ -462,13 +462,12 @@ loop2: done_hash: - mov %r12, %rsp - - popq %r12 + mov %rbp, %rsp + popq %rbp popq %r15 popq %r14 popq %r13 - popq %rbp + popq %r12 popq %rbx ret From ca04c823763e5b82c237cabe0c17f547ecdc6271 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:10 -0500 Subject: [PATCH 151/279] crypto: sha512-avx2 - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Mix things up a little bit to get rid of the RBP usage, without hurting performance too much. Use RDI instead of RBP for the TBL pointer. That will clobber CTX, so spill CTX onto the stack and use R12 to read it in the outer loop. R12 is used as a non-persistent temporary variable elsewhere, so it's safe to use. Also remove the unused y4 variable. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-avx2-asm.S | 75 ++++++++++++++++--------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 7f5f6c6ec72e..b16d56005162 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -69,8 +69,9 @@ XFER = YTMP0 BYTE_FLIP_MASK = %ymm9 -# 1st arg -CTX = %rdi +# 1st arg is %rdi, which is saved to the stack and accessed later via %r12 +CTX1 = %rdi +CTX2 = %r12 # 2nd arg INP = %rsi # 3rd arg @@ -81,7 +82,7 @@ d = %r8 e = %rdx y3 = %rsi -TBL = %rbp +TBL = %rdi # clobbers CTX1 a = %rax b = %rbx @@ -91,26 +92,26 @@ g = %r10 h = %r11 old_h = %r11 -T1 = %r12 +T1 = %r12 # clobbers CTX2 y0 = %r13 y1 = %r14 y2 = %r15 -y4 = %r12 - # Local variables (stack frame) XFER_SIZE = 4*8 SRND_SIZE = 1*8 INP_SIZE = 1*8 INPEND_SIZE = 1*8 +CTX_SIZE = 1*8 RSPSAVE_SIZE = 1*8 -GPRSAVE_SIZE = 6*8 +GPRSAVE_SIZE = 5*8 frame_XFER = 0 frame_SRND = frame_XFER + XFER_SIZE frame_INP = frame_SRND + SRND_SIZE frame_INPEND = frame_INP + INP_SIZE -frame_RSPSAVE = frame_INPEND + INPEND_SIZE +frame_CTX = frame_INPEND + INPEND_SIZE +frame_RSPSAVE = frame_CTX + CTX_SIZE frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE frame_size = frame_GPRSAVE + GPRSAVE_SIZE @@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx) mov %rax, frame_RSPSAVE(%rsp) # Save GPRs - mov %rbp, frame_GPRSAVE(%rsp) - mov %rbx, 8*1+frame_GPRSAVE(%rsp) - mov %r12, 8*2+frame_GPRSAVE(%rsp) - mov %r13, 8*3+frame_GPRSAVE(%rsp) - mov %r14, 8*4+frame_GPRSAVE(%rsp) - mov %r15, 8*5+frame_GPRSAVE(%rsp) + mov %rbx, 8*0+frame_GPRSAVE(%rsp) + mov %r12, 8*1+frame_GPRSAVE(%rsp) + mov %r13, 8*2+frame_GPRSAVE(%rsp) + mov %r14, 8*3+frame_GPRSAVE(%rsp) + mov %r15, 8*4+frame_GPRSAVE(%rsp) shl $7, NUM_BLKS # convert to bytes jz done_hash @@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx) mov NUM_BLKS, frame_INPEND(%rsp) ## load initial digest - mov 8*0(CTX),a - mov 8*1(CTX),b - mov 8*2(CTX),c - mov 8*3(CTX),d - mov 8*4(CTX),e - mov 8*5(CTX),f - mov 8*6(CTX),g - mov 8*7(CTX),h + mov 8*0(CTX1), a + mov 8*1(CTX1), b + mov 8*2(CTX1), c + mov 8*3(CTX1), d + mov 8*4(CTX1), e + mov 8*5(CTX1), f + mov 8*6(CTX1), g + mov 8*7(CTX1), h + + # save %rdi (CTX) before it gets clobbered + mov %rdi, frame_CTX(%rsp) vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK @@ -652,14 +655,15 @@ loop2: subq $1, frame_SRND(%rsp) jne loop2 - addm 8*0(CTX),a - addm 8*1(CTX),b - addm 8*2(CTX),c - addm 8*3(CTX),d - addm 8*4(CTX),e - addm 8*5(CTX),f - addm 8*6(CTX),g - addm 8*7(CTX),h + mov frame_CTX(%rsp), CTX2 + addm 8*0(CTX2), a + addm 8*1(CTX2), b + addm 8*2(CTX2), c + addm 8*3(CTX2), d + addm 8*4(CTX2), e + addm 8*5(CTX2), f + addm 8*6(CTX2), g + addm 8*7(CTX2), h mov frame_INP(%rsp), INP add $128, INP @@ -669,12 +673,11 @@ loop2: done_hash: # Restore GPRs - mov frame_GPRSAVE(%rsp) ,%rbp - mov 8*1+frame_GPRSAVE(%rsp) ,%rbx - mov 8*2+frame_GPRSAVE(%rsp) ,%r12 - mov 8*3+frame_GPRSAVE(%rsp) ,%r13 - mov 8*4+frame_GPRSAVE(%rsp) ,%r14 - mov 8*5+frame_GPRSAVE(%rsp) ,%r15 + mov 8*0+frame_GPRSAVE(%rsp), %rbx + mov 8*1+frame_GPRSAVE(%rsp), %r12 + mov 8*2+frame_GPRSAVE(%rsp), %r13 + mov 8*3+frame_GPRSAVE(%rsp), %r14 + mov 8*4+frame_GPRSAVE(%rsp), %r15 # Restore Stack Pointer mov frame_RSPSAVE(%rsp), %rsp From 8f182f845d0fa26ecc18d3bdcc3d1077a0ea3a31 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 14:42:11 -0500 Subject: [PATCH 152/279] crypto: x86/twofish - Fix RBP usage Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R13 instead of RBP. Both are callee-saved registers, so the substitution is straightforward. Reported-by: Eric Biggers Reported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index b3f49d286348..73b471da3622 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -76,8 +76,8 @@ #define RT %xmm14 #define RR %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %r13 +#define RID1d %r13d #define RID2 %rsi #define RID2d %esi @@ -259,7 +259,7 @@ __twofish_enc_blk8: vmovdqu w(CTX), RK1; - pushq %rbp; + pushq %r13; pushq %rbx; pushq %rcx; @@ -282,7 +282,7 @@ __twofish_enc_blk8: popq %rcx; popq %rbx; - popq %rbp; + popq %r13; outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); @@ -301,7 +301,7 @@ __twofish_dec_blk8: vmovdqu (w+4*4)(CTX), RK1; - pushq %rbp; + pushq %r13; pushq %rbx; inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); @@ -322,7 +322,7 @@ __twofish_dec_blk8: vmovdqu (w)(CTX), RK1; popq %rbx; - popq %rbp; + popq %r13; outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); From afd62fa26343be6445479e75de9f07092a061459 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 13 Sep 2017 12:44:51 +0200 Subject: [PATCH 153/279] crypto: talitos - fix sha224 Kernel crypto tests report the following error at startup [ 2.752626] alg: hash: Test 4 failed for sha224-talitos [ 2.757907] 00000000: 30 e2 86 e2 e7 8a dd 0d d7 eb 9f d5 83 fe f1 b0 00000010: 2d 5a 6c a5 f9 55 ea fd 0e 72 05 22 This patch fixes it Cc: Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 79791c690858..6596761bc0c8 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1756,9 +1756,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, req_ctx->swinit = 0; } else { desc->ptr[1] = zero_entry; - /* Indicate next op is not the first. */ - req_ctx->first = 0; } + /* Indicate next op is not the first. */ + req_ctx->first = 0; /* HMAC key */ if (ctx->keylen) From 886a27c0fc8a34633aadb0986dba11d8c150ae2e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Wed, 13 Sep 2017 12:44:57 +0200 Subject: [PATCH 154/279] crypto: talitos - fix hashing md5sum on some files gives wrong result Exemple: With the md5sum from libkcapi: c15115c05bad51113f81bdaee735dd09 test With the original md5sum: bbdf41d80ba7e8b2b7be3a0772be76cb test This patch fixes this issue Cc: Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 6596761bc0c8..8a48fc4f3642 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1769,7 +1769,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, sg_count = edesc->src_nents ?: 1; if (is_sec1 && sg_count > 1) - sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); + sg_copy_to_buffer(req_ctx->psrc, sg_count, edesc->buf, length); else sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, DMA_TO_DEVICE); From 56136631573baa537a15e0012055ffe8cfec1a33 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 12 Sep 2017 11:03:39 +0200 Subject: [PATCH 155/279] crypto: talitos - Don't provide setkey for non hmac hashing algs. Today, md5sum fails with error -ENOKEY because a setkey function is set for non hmac hashing algs, see strace output below: mmap(NULL, 378880, PROT_READ, MAP_SHARED, 6, 0) = 0x77f50000 accept(3, 0, NULL) = 7 vmsplice(5, [{"bin/\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 378880}], 1, SPLICE_F_MORE|SPLICE_F_GIFT) = 262144 splice(4, NULL, 7, NULL, 262144, SPLICE_F_MORE) = -1 ENOKEY (Required key not available) write(2, "Generation of hash for file kcap"..., 50) = 50 munmap(0x77f50000, 378880) = 0 This patch ensures that setkey() function is set only for hmac hashing. Cc: Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 8a48fc4f3642..dff88838dce7 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -3057,7 +3057,8 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.hash.final = ahash_final; t_alg->algt.alg.hash.finup = ahash_finup; t_alg->algt.alg.hash.digest = ahash_digest; - t_alg->algt.alg.hash.setkey = ahash_setkey; + if (!strncmp(alg->cra_name, "hmac", 4)) + t_alg->algt.alg.hash.setkey = ahash_setkey; t_alg->algt.alg.hash.import = ahash_import; t_alg->algt.alg.hash.export = ahash_export; From 3e1166b94e661ed51af1fe1fe5f74bd83450b50f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 12 Sep 2017 12:12:16 +0200 Subject: [PATCH 156/279] crypto: inside-secure - fix gcc-4.9 warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All older compiler versions up to gcc-4.9 produce these harmless warnings: drivers/crypto/inside-secure/safexcel_cipher.c:389:9: warning: missing braces around initializer [-Wmissing-braces] drivers/crypto/inside-secure/safexcel_cipher.c:389:9: warning: (near initialization for ‘result.completion’) [-Wmissing-braces] drivers/crypto/inside-secure/safexcel_hash.c:422:9: warning: missing braces around initializer [-Wmissing-braces] drivers/crypto/inside-secure/safexcel_hash.c:422:9: warning: (near initialization for ‘result.completion’) [-Wmissing-braces] This changes the syntax to something that works on all versions without warnings. Fixes: 1b44c5a60c13 ("crypto: inside-secure - add SafeXcel EIP197 crypto engine driver") Signed-off-by: Arnd Bergmann Acked-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_cipher.c | 2 +- drivers/crypto/inside-secure/safexcel_hash.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c index d2207ac5ba19..5438552bc6d7 100644 --- a/drivers/crypto/inside-secure/safexcel_cipher.c +++ b/drivers/crypto/inside-secure/safexcel_cipher.c @@ -386,7 +386,7 @@ static int safexcel_cipher_exit_inv(struct crypto_tfm *tfm) struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; struct skcipher_request req; - struct safexcel_inv_result result = { 0 }; + struct safexcel_inv_result result = {}; int ring = ctx->base.ring; memset(&req, 0, sizeof(struct skcipher_request)); diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 3f819399cd95..3980f946874f 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -419,7 +419,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->priv; struct ahash_request req; - struct safexcel_inv_result result = { 0 }; + struct safexcel_inv_result result = {}; int ring = ctx->base.ring; memset(&req, 0, sizeof(struct ahash_request)); From c056d910f08029662080a01b4ce2110e2c9a27b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Fri, 1 Sep 2017 17:12:59 +0300 Subject: [PATCH 157/279] crypto: caam - fix LS1021A support on ARMv7 multiplatform kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When built using multi_v7_defconfig, driver does not work on LS1021A: [...] caam 1700000.crypto: can't identify CAAM ipg clk: -2 caam: probe of 1700000.crypto failed with error -2 [...] It turns out we have to detect at runtime whether driver is running on an i.MX platform or not. Cc: Fixes: 6c3af9559352 ("crypto: caam - add support for LS1021A") Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 5 +--- drivers/crypto/caam/ctrl.c | 19 ++++++------ drivers/crypto/caam/regs.h | 59 ++++++++++++++++++------------------- 3 files changed, 39 insertions(+), 44 deletions(-) diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index e36aeacd7635..1eb852765469 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -1,6 +1,7 @@ config CRYPTO_DEV_FSL_CAAM tristate "Freescale CAAM-Multicore driver backend" depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE + select SOC_BUS help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). @@ -141,10 +142,6 @@ config CRYPTO_DEV_FSL_CAAM_RNG_API To compile this as a module, choose M here: the module will be called caamrng. -config CRYPTO_DEV_FSL_CAAM_IMX - def_bool SOC_IMX6 || SOC_IMX7D - depends on CRYPTO_DEV_FSL_CAAM - config CRYPTO_DEV_FSL_CAAM_DEBUG bool "Enable debug output in CAAM driver" depends on CRYPTO_DEV_FSL_CAAM diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index dacb53fb690e..027e121c6f70 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "compat.h" #include "regs.h" @@ -19,6 +20,8 @@ bool caam_little_end; EXPORT_SYMBOL(caam_little_end); bool caam_dpaa2; EXPORT_SYMBOL(caam_dpaa2); +bool caam_imx; +EXPORT_SYMBOL(caam_imx); #ifdef CONFIG_CAAM_QI #include "qi.h" @@ -28,19 +31,11 @@ EXPORT_SYMBOL(caam_dpaa2); * i.MX targets tend to have clock control subsystems that can * enable/disable clocking to our device. */ -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX static inline struct clk *caam_drv_identify_clk(struct device *dev, char *clk_name) { - return devm_clk_get(dev, clk_name); + return caam_imx ? devm_clk_get(dev, clk_name) : NULL; } -#else -static inline struct clk *caam_drv_identify_clk(struct device *dev, - char *clk_name) -{ - return NULL; -} -#endif /* * Descriptor to instantiate RNG State Handle 0 in normal mode and @@ -430,6 +425,10 @@ static int caam_probe(struct platform_device *pdev) { int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN; u64 caam_id; + static const struct soc_device_attribute imx_soc[] = { + {.family = "Freescale i.MX"}, + {}, + }; struct device *dev; struct device_node *nprop, *np; struct caam_ctrl __iomem *ctrl; @@ -451,6 +450,8 @@ static int caam_probe(struct platform_device *pdev) dev_set_drvdata(dev, ctrlpriv); nprop = pdev->dev.of_node; + caam_imx = (bool)soc_device_match(imx_soc); + /* Enable clocking */ clk = caam_drv_identify_clk(&pdev->dev, "ipg"); if (IS_ERR(clk)) { diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 2b5efff9ec3c..17cfd23a38fa 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -67,6 +67,7 @@ */ extern bool caam_little_end; +extern bool caam_imx; #define caam_to_cpu(len) \ static inline u##len caam##len ## _to_cpu(u##len val) \ @@ -154,13 +155,10 @@ static inline u64 rd_reg64(void __iomem *reg) #else /* CONFIG_64BIT */ static inline void wr_reg64(void __iomem *reg, u64 data) { -#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX - if (caam_little_end) { + if (!caam_imx && caam_little_end) { wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); wr_reg32((u32 __iomem *)(reg), data); - } else -#endif - { + } else { wr_reg32((u32 __iomem *)(reg), data >> 32); wr_reg32((u32 __iomem *)(reg) + 1, data); } @@ -168,41 +166,40 @@ static inline void wr_reg64(void __iomem *reg, u64 data) static inline u64 rd_reg64(void __iomem *reg) { -#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX - if (caam_little_end) + if (!caam_imx && caam_little_end) return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | (u64)rd_reg32((u32 __iomem *)(reg))); - else -#endif - return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | - (u64)rd_reg32((u32 __iomem *)(reg) + 1)); + + return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg) + 1)); } #endif /* CONFIG_64BIT */ +static inline u64 cpu_to_caam_dma64(dma_addr_t value) +{ + if (caam_imx) + return (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | + (u64)cpu_to_caam32(upper_32_bits(value))); + + return cpu_to_caam64(value); +} + +static inline u64 caam_dma64_to_cpu(u64 value) +{ + if (caam_imx) + return (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | + (u64)caam32_to_cpu(upper_32_bits(value))); + + return caam64_to_cpu(value); +} + #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT -#ifdef CONFIG_SOC_IMX7D -#define cpu_to_caam_dma(value) \ - (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ - (u64)cpu_to_caam32(upper_32_bits(value))) -#define caam_dma_to_cpu(value) \ - (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ - (u64)caam32_to_cpu(upper_32_bits(value))) -#else -#define cpu_to_caam_dma(value) cpu_to_caam64(value) -#define caam_dma_to_cpu(value) caam64_to_cpu(value) -#endif /* CONFIG_SOC_IMX7D */ +#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value) +#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value) #else #define cpu_to_caam_dma(value) cpu_to_caam32(value) #define caam_dma_to_cpu(value) caam32_to_cpu(value) -#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ - -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX -#define cpu_to_caam_dma64(value) \ - (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ - (u64)cpu_to_caam32(upper_32_bits(value))) -#else -#define cpu_to_caam_dma64(value) cpu_to_caam64(value) -#endif +#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ /* * jr_outentry From e117765a117da3ece15689cb8a759d16c415b08c Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 30 Aug 2017 09:17:39 +0200 Subject: [PATCH 158/279] crypto: af_alg - update correct dst SGL entry When two adjacent TX SGL are processed and parts of both TX SGLs are pulled into the per-request TX SGL, the wrong per-request TX SGL entries were updated. This fixes a NULL pointer dereference when a cipher implementation walks the TX SGL where some of the SGL entries were NULL. Fixes: e870456d8e7c ("crypto: algif_skcipher - overhaul memory...") Signed-off-by: Stephan Mueller Signed-off-by: Herbert Xu --- crypto/af_alg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ffa9f4ccd9b4..337cf382718e 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -619,14 +619,14 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, struct af_alg_ctx *ctx = ask->private; struct af_alg_tsgl *sgl; struct scatterlist *sg; - unsigned int i, j; + unsigned int i, j = 0; while (!list_empty(&ctx->tsgl_list)) { sgl = list_first_entry(&ctx->tsgl_list, struct af_alg_tsgl, list); sg = sgl->sg; - for (i = 0, j = 0; i < sgl->cur; i++) { + for (i = 0; i < sgl->cur; i++) { size_t plen = min_t(size_t, used, sg[i].length); struct page *page = sg_page(sg + i); From 8afafa6fba7809c0785018b77c95b19e58b35b94 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 15 Sep 2017 15:38:21 +0530 Subject: [PATCH 159/279] powerpc/kprobes: Update optprobes to use emulate_update_regs() Optprobes depended on an updated regs->nip from analyse_instr() to identify the location to branch back from the optprobes trampoline. However, since commit 3cdfcbfd32b9d ("powerpc: Change analyse_instr so it doesn't modify *regs"), analyse_instr() doesn't update the registers anymore. Due to this, we end up branching back from the optprobes trampoline to the same branch into the trampoline resulting in a loop. Fix this by calling out to emulate_update_regs() before using the nip. Additionally, explicitly compare the return value from analyse_instr() to 1, rather than just checking for !0 so as to guard against any future changes to analyse_instr() that may result in -1 being returned in more scenarios. Fixes: 3cdfcbfd32b9d ("powerpc: Change analyse_instr so it doesn't modify *regs") Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/optprobes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 6f8273f5e988..91e037ab20a1 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -104,8 +104,10 @@ static unsigned long can_optimize(struct kprobe *p) * and that can be emulated. */ if (!is_conditional_branch(*p->ainsn.insn) && - analyse_instr(&op, ®s, *p->ainsn.insn)) + analyse_instr(&op, ®s, *p->ainsn.insn) == 1) { + emulate_update_regs(®s, &op); nip = regs.nip; + } return nip; } From 1b25fda0533462c9cee3a22e8a7bea68fa670af2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 19 Sep 2017 12:52:22 +0200 Subject: [PATCH 160/279] s390/topology: alternative topology for topology-less machines If running on machines that do not provide topology information we currently generate a "fake" topology which defines the maximum distance between each cpu: each cpu will be put into an own drawer. Historically this used to be the best option for (virtual) machines in overcommited hypervisors. For some workloads however it is better to generate a different topology where all cpus are siblings within a package (all cpus are core siblings). This shows performance improvements of up to 10%, depending on the workload. In order to keep the current behaviour, but also allow to switch to the different core sibling topology use the existing "topology=" kernel parameter: Specifying "topology=on" on machines without topology information will generate the core siblings (fake) topology information, instead of the default topology information where all cpus have the maximum distance. On machines which provide topology information specifying "topology=on" does not have any effect. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 12 ------- arch/s390/kernel/topology.c | 72 +++++++++++++++++++++++++++++++------ 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index ca8cd80e8feb..60181caf8e8a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -404,18 +404,6 @@ static inline void save_vector_registers(void) #endif } -static int __init topology_setup(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (!rc && !enabled) - S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY; - return rc; -} -early_param("topology", topology_setup); - static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index bb47c92476f0..a0ce9c83f589 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -29,12 +29,20 @@ #define PTF_VERTICAL (1UL) #define PTF_CHECK (2UL) +enum { + TOPOLOGY_MODE_HW, + TOPOLOGY_MODE_SINGLE, + TOPOLOGY_MODE_PACKAGE, + TOPOLOGY_MODE_UNINITIALIZED +}; + struct mask_info { struct mask_info *next; unsigned char id; cpumask_t mask; }; +static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED; static void set_topology_timer(void); static void topology_work_fn(struct work_struct *work); static struct sysinfo_15_1_x *tl_info; @@ -59,11 +67,26 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) cpumask_t mask; cpumask_copy(&mask, cpumask_of(cpu)); - if (!MACHINE_HAS_TOPOLOGY) - return mask; - for (; info; info = info->next) { - if (cpumask_test_cpu(cpu, &info->mask)) - return info->mask; + switch (topology_mode) { + case TOPOLOGY_MODE_HW: + while (info) { + if (cpumask_test_cpu(cpu, &info->mask)) { + mask = info->mask; + break; + } + info = info->next; + } + if (cpumask_empty(&mask)) + cpumask_copy(&mask, cpumask_of(cpu)); + break; + case TOPOLOGY_MODE_PACKAGE: + cpumask_copy(&mask, cpu_present_mask); + break; + default: + /* fallthrough */ + case TOPOLOGY_MODE_SINGLE: + cpumask_copy(&mask, cpumask_of(cpu)); + break; } return mask; } @@ -74,7 +97,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu) int i; cpumask_copy(&mask, cpumask_of(cpu)); - if (!MACHINE_HAS_TOPOLOGY) + if (topology_mode != TOPOLOGY_MODE_HW) return mask; cpu -= cpu % (smp_cpu_mtid + 1); for (i = 0; i <= smp_cpu_mtid; i++) @@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc) static void update_cpu_masks(void) { struct cpu_topology_s390 *topo; - int cpu; + int cpu, id; for_each_possible_cpu(cpu) { topo = &cpu_topology[cpu]; @@ -231,12 +254,13 @@ static void update_cpu_masks(void) topo->core_mask = cpu_group_map(&socket_info, cpu); topo->book_mask = cpu_group_map(&book_info, cpu); topo->drawer_mask = cpu_group_map(&drawer_info, cpu); - if (!MACHINE_HAS_TOPOLOGY) { + if (topology_mode != TOPOLOGY_MODE_HW) { + id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu; topo->thread_id = cpu; topo->core_id = cpu; - topo->socket_id = cpu; - topo->book_id = cpu; - topo->drawer_id = cpu; + topo->socket_id = id; + topo->book_id = id; + topo->drawer_id = id; if (cpu_present(cpu)) cpumask_set_cpu(cpu, &cpus_with_topology); } @@ -459,6 +483,12 @@ void __init topology_init_early(void) struct sysinfo_15_1_x *info; set_sched_topology(s390_topology); + if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) { + if (MACHINE_HAS_TOPOLOGY) + topology_mode = TOPOLOGY_MODE_HW; + else + topology_mode = TOPOLOGY_MODE_SINGLE; + } if (!MACHINE_HAS_TOPOLOGY) goto out; tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE); @@ -474,6 +504,26 @@ out: __arch_update_cpu_topology(); } +static inline int topology_get_mode(int enabled) +{ + if (!enabled) + return TOPOLOGY_MODE_SINGLE; + return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; +} + +static int __init topology_setup(char *str) +{ + bool enabled; + int rc; + + rc = kstrtobool(str, &enabled); + if (rc) + return rc; + topology_mode = topology_get_mode(enabled); + return 0; +} +early_param("topology", topology_setup); + static int __init topology_init(void) { if (MACHINE_HAS_TOPOLOGY) From 51dce3867c6c63c7500332e5448c2ba76808d6b5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 14 Sep 2017 14:42:32 +0200 Subject: [PATCH 161/279] s390/topology: enable / disable topology dynamically Add a new sysctl file /proc/sys/s390/topology which displays if topology is on (1) or off (0) as specified by the "topology=" kernel parameter. This allows to change topology information during runtime and configuring it via /etc/sysctl.conf instead of using the kernel line parameter. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/topology.c | 76 ++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a0ce9c83f589..ed0bdd220e1a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include #include @@ -207,10 +209,8 @@ static void topology_update_polarization_simple(void) { int cpu; - mutex_lock(&smp_cpu_state_mutex); for_each_possible_cpu(cpu) smp_cpu_set_polarization(cpu, POLARIZATION_HRZ); - mutex_unlock(&smp_cpu_state_mutex); } static int ptf(unsigned long fc) @@ -278,6 +278,7 @@ static int __arch_update_cpu_topology(void) struct sysinfo_15_1_x *info = tl_info; int rc = 0; + mutex_lock(&smp_cpu_state_mutex); cpumask_clear(&cpus_with_topology); if (MACHINE_HAS_TOPOLOGY) { rc = 1; @@ -287,6 +288,7 @@ static int __arch_update_cpu_topology(void) update_cpu_masks(); if (!MACHINE_HAS_TOPOLOGY) topology_update_polarization_simple(); + mutex_unlock(&smp_cpu_state_mutex); return rc; } @@ -313,6 +315,11 @@ void topology_schedule_update(void) schedule_work(&topology_work); } +static void topology_flush_work(void) +{ + flush_work(&topology_work); +} + static void topology_timer_fn(unsigned long ignored) { if (ptf(PTF_CHECK)) @@ -511,6 +518,11 @@ static inline int topology_get_mode(int enabled) return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE; } +static inline int topology_is_enabled(void) +{ + return topology_mode != TOPOLOGY_MODE_SINGLE; +} + static int __init topology_setup(char *str) { bool enabled; @@ -524,12 +536,72 @@ static int __init topology_setup(char *str) } early_param("topology", topology_setup); +static int topology_ctl_handler(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + unsigned int len; + int new_mode; + char buf[2]; + + if (!*lenp || *ppos) { + *lenp = 0; + return 0; + } + if (!write) { + strncpy(buf, topology_is_enabled() ? "1\n" : "0\n", + ARRAY_SIZE(buf)); + len = strnlen(buf, ARRAY_SIZE(buf)); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + goto out; + } + len = *lenp; + if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + if (buf[0] != '0' && buf[0] != '1') + return -EINVAL; + mutex_lock(&smp_cpu_state_mutex); + new_mode = topology_get_mode(buf[0] == '1'); + if (topology_mode != new_mode) { + topology_mode = new_mode; + topology_schedule_update(); + } + mutex_unlock(&smp_cpu_state_mutex); + topology_flush_work(); +out: + *lenp = len; + *ppos += len; + return 0; +} + +static struct ctl_table topology_ctl_table[] = { + { + .procname = "topology", + .mode = 0644, + .proc_handler = topology_ctl_handler, + }, + { }, +}; + +static struct ctl_table topology_dir_table[] = { + { + .procname = "s390", + .maxlen = 0, + .mode = 0555, + .child = topology_ctl_table, + }, + { }, +}; + static int __init topology_init(void) { if (MACHINE_HAS_TOPOLOGY) set_topology_timer(); else topology_update_polarization_simple(); + register_sysctl_table(topology_dir_table); return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); From 9e09007486c883de4aa78a384e1d54c3fe6e42d5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Sep 2017 18:12:36 +0900 Subject: [PATCH 162/279] kbuild: rpm-pkg: delete firmware_install to fix build error Commit 5620a0d1aacd ("firmware: delete in-kernel firmware") deleted in-kernel firmware support, including "make firmware_install". Since then, "make rpm-pkg" / "make binrpm-pkg" fails to build with the error: make[2]: *** No rule to make target `firmware_install'. Stop. Commit df85b2d767aa ("firmware: Restore support for built-in firmware") restored the build infrastructure for CONFIG_EXTRA_FIRMWARE, but this is out of the scope of "make firmware_install". So, the right thing to do is to kill the use of "make firmware_install". Fixes: 5620a0d1aacd ("firmware: delete in-kernel firmware") Signed-off-by: Masahiro Yamada Acked-by: Greg Kroah-Hartman --- scripts/package/mkspec | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/package/mkspec b/scripts/package/mkspec index bb43f153fd8e..bf3e9abb2846 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -88,11 +88,8 @@ echo 'mkdir -p $RPM_BUILD_ROOT/boot/efi $RPM_BUILD_ROOT/lib/modules' echo "%else" echo 'mkdir -p $RPM_BUILD_ROOT/boot $RPM_BUILD_ROOT/lib/modules' echo "%endif" -echo 'mkdir -p $RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE" -echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= mod-fw= modules_install' -echo 'INSTALL_FW_PATH=$RPM_BUILD_ROOT'"/lib/firmware/$KERNELRELEASE" -echo 'make INSTALL_FW_PATH=$INSTALL_FW_PATH' firmware_install +echo 'INSTALL_MOD_PATH=$RPM_BUILD_ROOT make %{?_smp_mflags} KBUILD_SRC= modules_install' echo "%ifarch ia64" echo 'cp $KBUILD_IMAGE $RPM_BUILD_ROOT'"/boot/efi/vmlinuz-$KERNELRELEASE" echo 'ln -s '"efi/vmlinuz-$KERNELRELEASE" '$RPM_BUILD_ROOT'"/boot/" @@ -119,7 +116,7 @@ if ! $PREBUILT; then echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/build" echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/source" echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE" -echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\"" +echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude .config.old --exclude .missing-syscalls.d\"" echo "tar "'$EXCLUDES'" -cf- . | (cd "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE;tar xvf -)" echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE" echo "ln -sf /usr/src/kernels/$KERNELRELEASE build" @@ -154,7 +151,6 @@ echo '%defattr (-, root, root)' echo "/lib/modules/$KERNELRELEASE" echo "%exclude /lib/modules/$KERNELRELEASE/build" echo "%exclude /lib/modules/$KERNELRELEASE/source" -echo "/lib/firmware/$KERNELRELEASE" echo "/boot/*" echo "" echo "%files headers" From cc18abbe449aafc013831a8e0440afc336ae1cba Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Sep 2017 18:22:19 +0900 Subject: [PATCH 163/279] kbuild: deb-pkg: remove firmware package support Commit 5620a0d1aacd ("firmware: delete in-kernel firmware") deleted in-kernel firmware support, including the firmware install command. So, the firmware package does not make sense any more. Remove it. Signed-off-by: Masahiro Yamada Reviewed-by: Riku Voipio Acked-by: Greg Kroah-Hartman --- scripts/package/builddeb | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index aad67000e4dd..0bc87473f68f 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -92,12 +92,10 @@ else fi sourcename=$KDEB_SOURCENAME tmpdir="$objtree/debian/tmp" -fwdir="$objtree/debian/fwtmp" kernel_headers_dir="$objtree/debian/hdrtmp" libc_headers_dir="$objtree/debian/headertmp" dbg_dir="$objtree/debian/dbgtmp" packagename=linux-image-$version -fwpackagename=linux-firmware-image-$version kernel_headers_packagename=linux-headers-$version libc_headers_packagename=linux-libc-dev dbg_packagename=$packagename-dbg @@ -126,10 +124,9 @@ esac BUILD_DEBUG="$(grep -s '^CONFIG_DEBUG_INFO=y' $KCONFIG_CONFIG || true)" # Setup the directory structure -rm -rf "$tmpdir" "$fwdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files +rm -rf "$tmpdir" "$kernel_headers_dir" "$libc_headers_dir" "$dbg_dir" $objtree/debian/files mkdir -m 755 -p "$tmpdir/DEBIAN" mkdir -p "$tmpdir/lib" "$tmpdir/boot" -mkdir -p "$fwdir/lib/firmware/$version/" mkdir -p "$kernel_headers_dir/lib/modules/$version/" # Build and install the kernel @@ -306,7 +303,6 @@ else cat <> debian/control Package: $packagename -Suggests: $fwpackagename Architecture: any Description: Linux kernel, version $version This package contains the Linux kernel, modules and corresponding other @@ -345,22 +341,6 @@ Description: Linux kernel headers for $KERNELRELEASE on \${kernel:debarch} This is useful for people who need to build external modules EOF -# Do we have firmware? Move it out of the way and build it into a package. -if [ -e "$tmpdir/lib/firmware" ]; then - mv "$tmpdir/lib/firmware"/* "$fwdir/lib/firmware/$version/" - rmdir "$tmpdir/lib/firmware" - - cat <> debian/control - -Package: $fwpackagename -Architecture: all -Description: Linux kernel firmware, version $version - This package contains firmware from the Linux kernel, version $version. -EOF - - create_package "$fwpackagename" "$fwdir" -fi - cat <> debian/control Package: $libc_headers_packagename From 25b080bd53f2873785fa049f570ac1b361c11d72 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 Sep 2017 22:01:26 +0900 Subject: [PATCH 164/279] kbuild: rpm-pkg: fix version number handling The "Release:" field of the spec file is determined based on the .version file. However, the .version file is not copied to the source tar file. So, when we build the kernel from the source package, the UTS_VERSION always indicates #1. This does not match with "rpm -q". The kernel UTS_VERSION and "rpm -q" do not agree for binrpm-pkg, either. Please note the kernel has already been built before the spec file is created. Currently, mkspec invokes mkversion. This script returns an incremented version. So, the "Release:" field of the spec file is greater than the version in the kernel by one. For the source package build (where .version file is missing), we can give KBUILD_BUILD_VERSION=%{release} to the build command. For the binary package build, we can simply read out the .version file because it contains the version number that was used for building the kernel image. We can remove scripts/mkversion because scripts/package/Makefile need not touch the .version file. Signed-off-by: Masahiro Yamada --- scripts/mkversion | 6 ------ scripts/package/Makefile | 5 ----- scripts/package/mkspec | 6 ++---- 3 files changed, 2 insertions(+), 15 deletions(-) delete mode 100644 scripts/mkversion diff --git a/scripts/mkversion b/scripts/mkversion deleted file mode 100644 index c12addc9c7ef..000000000000 --- a/scripts/mkversion +++ /dev/null @@ -1,6 +0,0 @@ -if [ ! -f .version ] -then - echo 1 -else - expr 0`cat .version` + 1 -fi diff --git a/scripts/package/Makefile b/scripts/package/Makefile index 71b4a8af9d4d..73f9f3192b9f 100644 --- a/scripts/package/Makefile +++ b/scripts/package/Makefile @@ -50,8 +50,6 @@ rpm-pkg rpm: FORCE $(MAKE) clean $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec $(call cmd,src_tar,$(KERNELPATH),kernel.spec) - $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version - mv -f $(objtree)/.tmp_version $(objtree)/.version rpmbuild $(RPMOPTS) --target $(UTS_MACHINE) -ta $(KERNELPATH).tar.gz rm $(KERNELPATH).tar.gz kernel.spec @@ -60,9 +58,6 @@ rpm-pkg rpm: FORCE binrpm-pkg: FORCE $(MAKE) KBUILD_SRC= $(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec - $(CONFIG_SHELL) $(srctree)/scripts/mkversion > $(objtree)/.tmp_version - mv -f $(objtree)/.tmp_version $(objtree)/.version - rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \ $(UTS_MACHINE) -bb $(objtree)/binkernel.spec rm binkernel.spec diff --git a/scripts/package/mkspec b/scripts/package/mkspec index bf3e9abb2846..f47f17aae135 100755 --- a/scripts/package/mkspec +++ b/scripts/package/mkspec @@ -27,9 +27,7 @@ __KERNELRELEASE=`echo $KERNELRELEASE | sed -e "s/-/_/g"` echo "Name: kernel" echo "Summary: The Linux Kernel" echo "Version: $__KERNELRELEASE" -# we need to determine the NEXT version number so that uname and -# rpm -q will agree -echo "Release: `. $srctree/scripts/mkversion`" +echo "Release: $(cat .version 2>/dev/null || echo 1)" echo "License: GPL" echo "Group: System Environment/Kernel" echo "Vendor: The Linux Community" @@ -77,7 +75,7 @@ fi echo "%build" if ! $PREBUILT; then -echo "make clean && make %{?_smp_mflags}" +echo "make clean && make %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}" echo "" fi From 749aaf3372b8b56b8743c3e27700d64c8bd06921 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Wed, 20 Sep 2017 13:56:06 -0500 Subject: [PATCH 165/279] PCI: endpoint: Use correct "end of test" interrupt pci_epf_test_raise_irq() reads the interrupt to use for the response from reg->command, but this has been cleared at the beginning of the command handler so the value is always zero at this point. Instead, extract the interrupt index before handling the command and then pass the requested interrupt into pci_epf_test_raise_irq(). This allows us to remove the specific code to extract the interrupt for COMMAND_RAISE_MSI_IRQ since it is now handled in common code. Fixes: 3ecf3232c54c ("PCI: endpoint: Do not reset *command* inadvertently") Signed-off-by: John Keeping Signed-off-by: Bjorn Helgaas Acked-by: Kishon Vijay Abraham I --- drivers/pci/endpoint/functions/pci-epf-test.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 4ddc6e8f9fe7..f9308c2f22e6 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -251,9 +251,8 @@ err: return ret; } -static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test) +static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test, u8 irq) { - u8 irq; u8 msi_count; struct pci_epf *epf = epf_test->epf; struct pci_epc *epc = epf->epc; @@ -262,7 +261,6 @@ static void pci_epf_test_raise_irq(struct pci_epf_test *epf_test) reg->status |= STATUS_IRQ_RAISED; msi_count = pci_epc_get_msi(epc); - irq = (reg->command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT; if (irq > msi_count || msi_count <= 0) pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0); else @@ -289,6 +287,8 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->command = 0; reg->status = 0; + irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT; + if (command & COMMAND_RAISE_LEGACY_IRQ) { reg->status = STATUS_IRQ_RAISED; pci_epc_raise_irq(epc, PCI_EPC_IRQ_LEGACY, 0); @@ -301,7 +301,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->status |= STATUS_WRITE_FAIL; else reg->status |= STATUS_WRITE_SUCCESS; - pci_epf_test_raise_irq(epf_test); + pci_epf_test_raise_irq(epf_test, irq); goto reset_handler; } @@ -311,7 +311,7 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->status |= STATUS_READ_SUCCESS; else reg->status |= STATUS_READ_FAIL; - pci_epf_test_raise_irq(epf_test); + pci_epf_test_raise_irq(epf_test, irq); goto reset_handler; } @@ -321,13 +321,12 @@ static void pci_epf_test_cmd_handler(struct work_struct *work) reg->status |= STATUS_COPY_SUCCESS; else reg->status |= STATUS_COPY_FAIL; - pci_epf_test_raise_irq(epf_test); + pci_epf_test_raise_irq(epf_test, irq); goto reset_handler; } if (command & COMMAND_RAISE_MSI_IRQ) { msi_count = pci_epc_get_msi(epc); - irq = (command & MSI_NUMBER_MASK) >> MSI_NUMBER_SHIFT; if (irq > msi_count || msi_count <= 0) goto reset_handler; reg->status = STATUS_IRQ_RAISED; From 008ba2a13f2d04c947adc536d19debb8fe66f110 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Thu, 14 Sep 2017 17:14:41 -0400 Subject: [PATCH 166/279] packet: hold bind lock when rebinding to fanout hook Packet socket bind operations must hold the po->bind_lock. This keeps po->running consistent with whether the socket is actually on a ptype list to receive packets. fanout_add unbinds a socket and its packet_rcv/tpacket_rcv call, then binds the fanout object to receive through packet_rcv_fanout. Make it hold the po->bind_lock when testing po->running and rebinding. Else, it can race with other rebind operations, such as that in packet_set_ring from packet_rcv to tpacket_rcv. Concurrent updates can result in a socket being added to a fanout group twice, causing use-after-free KASAN bug reports, among others. Reported independently by both trinity and syzkaller. Verified that the syzkaller reproducer passes after this patch. Fixes: dc99f600698d ("packet: Add fanout support.") Reported-by: nixioaming Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c26172995511..d288f52c53f7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1684,10 +1684,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) mutex_lock(&fanout_mutex); - err = -EINVAL; - if (!po->running) - goto out; - err = -EALREADY; if (po->fanout) goto out; @@ -1749,7 +1745,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) list_add(&match->list, &fanout_list); } err = -EINVAL; - if (match->type == type && + + spin_lock(&po->bind_lock); + if (po->running && + match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; @@ -1761,6 +1760,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) err = 0; } } + spin_unlock(&po->bind_lock); + + if (err && !refcount_read(&match->sk_ref)) { + list_del(&match->list); + kfree(match); + } + out: if (err && rollover) { kfree(rollover); From ec9dd352d591f0c90402ec67a317c1ed4fb2e638 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 18 Sep 2017 16:38:36 -0700 Subject: [PATCH 167/279] bpf: one perf event close won't free bpf program attached by another perf event This patch fixes a bug exhibited by the following scenario: 1. fd1 = perf_event_open with attr.config = ID1 2. attach bpf program prog1 to fd1 3. fd2 = perf_event_open with attr.config = ID1 4. user program closes fd2 and prog1 is detached from the tracepoint. 5. user program with fd1 does not work properly as tracepoint no output any more. The issue happens at step 4. Multiple perf_event_open can be called successfully, but only one bpf prog pointer in the tp_event. In the current logic, any fd release for the same tp_event will free the tp_event->prog. The fix is to free tp_event->prog only when the closing fd corresponds to the one which registered the program. Signed-off-by: Yonghong Song Signed-off-by: David S. Miller --- include/linux/trace_events.h | 1 + kernel/events/core.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 7f11050746ae..2e0f22298fe9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -272,6 +272,7 @@ struct trace_event_call { int perf_refcount; struct hlist_head __percpu *perf_events; struct bpf_prog *prog; + struct perf_event *bpf_prog_owner; int (*perf_perm)(struct trace_event_call *, struct perf_event *); diff --git a/kernel/events/core.c b/kernel/events/core.c index 3e691b75b2db..6bc21e202ae4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) } } event->tp_event->prog = prog; + event->tp_event->bpf_prog_owner = event; return 0; } @@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) return; prog = event->tp_event->prog; - if (prog) { + if (prog && event->tp_event->bpf_prog_owner == event) { event->tp_event->prog = NULL; bpf_prog_put(prog); } From 1fa089ec6d68849bfe60abd141375dc9b21b0ee8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 20 Sep 2017 16:46:49 -0500 Subject: [PATCH 168/279] [SMB3] Update session and share information displayed for debugging SMB2/SMB3 We were not displaying some key fields (session status and capabilities and whether guest authenticated) for SMB2/SMB3 session in /proc/fs/cifs/DebugData. This is needed for real world triage of problems with the (now much more common) SMB3 mounts. Signed-off-by: Steve French --- fs/cifs/cifs_debug.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 9727e1dcacd5..cbb9534b89b4 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -160,8 +160,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || (ses->serverNOS == NULL)) { - seq_printf(m, "\n%d) entry for %s not fully " - "displayed\n\t", i, ses->serverName); + seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t", + i, ses->serverName, ses->ses_count, + ses->capabilities, ses->status); + if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) + seq_printf(m, "Guest\t"); + else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) + seq_printf(m, "Anonymous\t"); } else { seq_printf(m, "\n%d) Name: %s Domain: %s Uses: %d OS:" From c2a64bb9fcd31c39feddf30748b4ee8d82e53c6a Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 19 Sep 2017 13:19:13 -0400 Subject: [PATCH 169/279] net: compat: assert the size of cmsg copied in is as expected The actual length of cmsg fetched in during the second loop (i.e., kcmsg - kcmsg_base) could be different from what we get from the first loop (i.e., kcmlen). The main reason is that the two get_user() calls in the two loops (i.e., get_user(ucmlen, &ucmsg->cmsg_len) and __get_user(ucmlen, &ucmsg->cmsg_len)) could cause ucmlen to have different values even they fetch from the same userspace address, as user can race to change the memory content in &ucmsg->cmsg_len across fetches. Although in the second loop, the sanity check if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp)) is inplace, it only ensures that the cmsg fetched in during the second loop does not exceed the length of kcmlen, but not necessarily equal to kcmlen. But indicated by the assignment kmsg->msg_controllen = kcmlen, we should enforce that. This patch adds this additional sanity check and ensures that what is recorded in kmsg->msg_controllen is the actual cmsg length. Signed-off-by: Meng Xu Signed-off-by: David S. Miller --- net/compat.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/compat.c b/net/compat.c index 6ded6c821d7a..22381719718c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } + /* + * check the length of messages copied in is the same as the + * what we get from the first loop + */ + if ((char *)kcmsg - (char *)kcmsg_base != kcmlen) + goto Einval; + /* Ok, looks like we made it. Hook it up and return success. */ kmsg->msg_control = kcmsg_base; kmsg->msg_controllen = kcmlen; From 92dd5452c1be873a1193561f4f691763103d22ac Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 19 Sep 2017 18:45:56 +0100 Subject: [PATCH 170/279] net: change skb->mac_header when Generic XDP calls adjust_head Since XDP's view of the packet includes the MAC header, moving the start- of-packet with bpf_xdp_adjust_head needs to also update the offset of the MAC header (which is relative to skb->head, not to the skb->data that was changed). Without this, tcpdump sees packets starting from the old MAC header rather than the new one, at least in my tests on the loopback device. Fixes: b5cdae3291f7 ("net: Generic XDP") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/dev.c b/net/core/dev.c index fb766d906148..9a2254f9802f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3892,6 +3892,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, __skb_pull(skb, off); else if (off < 0) __skb_push(skb, -off); + skb->mac_header += off; switch (act) { case XDP_REDIRECT: From 5e62d98c4bbc243f3dca18e73a754b629839fc5c Mon Sep 17 00:00:00 2001 From: Troy Kisky Date: Tue, 19 Sep 2017 17:33:07 -0700 Subject: [PATCH 171/279] net: fec: only check queue 0 if RXF_0/TXF_0 interrupt is set Before queue 0 was always checked if any queue caused an interrupt. It is better to just mark queue 0 if queue 0 has caused an interrupt. Signed-off-by: Troy Kisky Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 56f56d6ada9c..464055fb33d5 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1559,14 +1559,14 @@ fec_enet_collect_events(struct fec_enet_private *fep, uint int_events) if (int_events == 0) return false; - if (int_events & FEC_ENET_RXF) + if (int_events & FEC_ENET_RXF_0) fep->work_rx |= (1 << 2); if (int_events & FEC_ENET_RXF_1) fep->work_rx |= (1 << 0); if (int_events & FEC_ENET_RXF_2) fep->work_rx |= (1 << 1); - if (int_events & FEC_ENET_TXF) + if (int_events & FEC_ENET_TXF_0) fep->work_tx |= (1 << 2); if (int_events & FEC_ENET_TXF_1) fep->work_tx |= (1 << 0); From 7063c163cd4a20184b3bbada503dab8f254a8c56 Mon Sep 17 00:00:00 2001 From: Troy Kisky Date: Tue, 19 Sep 2017 17:33:08 -0700 Subject: [PATCH 172/279] net: fec: remove unused interrupt FEC_ENET_TS_TIMER FEC_ENET_TS_TIMER is not checked in the interrupt routine so there is no need to enable it. Signed-off-by: Troy Kisky Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 38c7b21e5d63..ede1876a9a19 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -374,8 +374,8 @@ struct bufdesc_ex { #define FEC_ENET_TS_AVAIL ((uint)0x00010000) #define FEC_ENET_TS_TIMER ((uint)0x00008000) -#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII | FEC_ENET_TS_TIMER) -#define FEC_NAPI_IMASK (FEC_ENET_MII | FEC_ENET_TS_TIMER) +#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII) +#define FEC_NAPI_IMASK FEC_ENET_MII #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF)) /* ENET interrupt coalescing macro define */ From e24ee2780a1d6786b94139ebf95b44c9ae62bf13 Mon Sep 17 00:00:00 2001 From: Troy Kisky Date: Tue, 19 Sep 2017 17:33:09 -0700 Subject: [PATCH 173/279] net: fec: return IRQ_HANDLED if fec_ptp_check_pps_event handled it fec_ptp_check_pps_event will return 1 if FEC_T_TF_MASK caused an interrupt. Don't return IRQ_NONE in this case. Signed-off-by: Troy Kisky Acked-by: Fugang Duan Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 464055fb33d5..3dc2d771a222 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1604,8 +1604,8 @@ fec_enet_interrupt(int irq, void *dev_id) } if (fep->ptp_clock) - fec_ptp_check_pps_event(fep); - + if (fec_ptp_check_pps_event(fep)) + ret = IRQ_HANDLED; return ret; } From 02388bf87f72e1d47174cd8f81c34443920eb5a0 Mon Sep 17 00:00:00 2001 From: Meng Xu Date: Tue, 19 Sep 2017 21:49:55 -0400 Subject: [PATCH 174/279] isdn/i4l: fetch the ppp_write buffer in one shot In isdn_ppp_write(), the header (i.e., protobuf) of the buffer is fetched twice from userspace. The first fetch is used to peek at the protocol of the message and reset the huptimer if necessary; while the second fetch copies in the whole buffer. However, given that buf resides in userspace memory, a user process can race to change its memory content across fetches. By doing so, we can either avoid resetting the huptimer for any type of packets (by first setting proto to PPP_LCP and later change to the actual type) or force resetting the huptimer for LCP packets. This patch changes this double-fetch behavior into two single fetches decided by condition (lp->isdn_device < 0 || lp->isdn_channel <0). A more detailed discussion can be found at https://marc.info/?l=linux-kernel&m=150586376926123&w=2 Signed-off-by: Meng Xu Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_ppp.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c index 6c44609fd83a..cd2b3c69771a 100644 --- a/drivers/isdn/i4l/isdn_ppp.c +++ b/drivers/isdn/i4l/isdn_ppp.c @@ -825,7 +825,6 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) isdn_net_local *lp; struct ippp_struct *is; int proto; - unsigned char protobuf[4]; is = file->private_data; @@ -839,24 +838,28 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) if (!lp) printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n"); else { - /* - * Don't reset huptimer for - * LCP packets. (Echo requests). - */ - if (copy_from_user(protobuf, buf, 4)) - return -EFAULT; - proto = PPP_PROTOCOL(protobuf); - if (proto != PPP_LCP) - lp->huptimer = 0; + if (lp->isdn_device < 0 || lp->isdn_channel < 0) { + unsigned char protobuf[4]; + /* + * Don't reset huptimer for + * LCP packets. (Echo requests). + */ + if (copy_from_user(protobuf, buf, 4)) + return -EFAULT; + + proto = PPP_PROTOCOL(protobuf); + if (proto != PPP_LCP) + lp->huptimer = 0; - if (lp->isdn_device < 0 || lp->isdn_channel < 0) return 0; + } if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) && lp->dialstate == 0 && (lp->flags & ISDN_NET_CONNECTED)) { unsigned short hl; struct sk_buff *skb; + unsigned char *cpy_buf; /* * we need to reserve enough space in front of * sk_buff. old call to dev_alloc_skb only reserved @@ -869,11 +872,21 @@ isdn_ppp_write(int min, struct file *file, const char __user *buf, int count) return count; } skb_reserve(skb, hl); - if (copy_from_user(skb_put(skb, count), buf, count)) + cpy_buf = skb_put(skb, count); + if (copy_from_user(cpy_buf, buf, count)) { kfree_skb(skb); return -EFAULT; } + + /* + * Don't reset huptimer for + * LCP packets. (Echo requests). + */ + proto = PPP_PROTOCOL(cpy_buf); + if (proto != PPP_LCP) + lp->huptimer = 0; + if (is->debug & 0x40) { printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len); isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot); From e92a0843795779678397ac0790a76de20f79cc13 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:50 +0800 Subject: [PATCH 175/279] net: hns3: Cleanup for ROCE capability flag in ae_dev This patch add the ROCE supported flag in the driver_data field of pci_device_id, delete roce_pci_tbl and change HNAE_DEV_SUPPORT_ROCE_B to HNAE3_DEV_SUPPORT_ROCE_B. This cleanup is done in order to support adding capability in pci_device_id and to fix initialization failure when cmd is not supported. Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 5 +++- .../hisilicon/hns3/hns3pf/hclge_main.c | 25 +++---------------- .../hisilicon/hns3/hns3pf/hns3_enet.c | 16 ++++++++---- 3 files changed, 19 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index b2f28ae81273..0f7b61a92f44 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -49,7 +49,10 @@ #define HNAE3_CLASS_NAME_SIZE 16 #define HNAE3_DEV_INITED_B 0x0 -#define HNAE_DEV_SUPPORT_ROCE_B 0x1 +#define HNAE3_DEV_SUPPORT_ROCE_B 0x1 + +#define hnae3_dev_roce_supported(hdev) \ + hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B) #define ring_ptr_move_fw(ring, p) \ ((ring)->p = ((ring)->p + 1) % (ring)->desc_num) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 74008ef23169..6953d19c6475 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -46,17 +46,7 @@ static const struct pci_device_id ae_algo_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, - /* Required last entry */ - {0, } -}; - -static const struct pci_device_id roce_pci_tbl[] = { - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, - /* Required last entry */ + /* required last entry */ {0, } }; @@ -894,7 +884,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) hdev->num_tqps = __le16_to_cpu(req->tqp_num); hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S; - if (hnae_get_bit(hdev->ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B)) { + if (hnae3_dev_roce_supported(hdev)) { hdev->num_roce_msix = hnae_get_field(__le16_to_cpu(req->pf_intr_vector_number), HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S); @@ -3932,8 +3922,7 @@ static int hclge_init_client_instance(struct hnae3_client *client, goto err; if (hdev->roce_client && - hnae_get_bit(hdev->ae_dev->flag, - HNAE_DEV_SUPPORT_ROCE_B)) { + hnae3_dev_roce_supported(hdev)) { struct hnae3_client *rc = hdev->roce_client; ret = hclge_init_roce_base_info(vport); @@ -3956,8 +3945,7 @@ static int hclge_init_client_instance(struct hnae3_client *client, break; case HNAE3_CLIENT_ROCE: - if (hnae_get_bit(hdev->ae_dev->flag, - HNAE_DEV_SUPPORT_ROCE_B)) { + if (hnae3_dev_roce_supported(hdev)) { hdev->roce_client = client; vport->roce.client = client; } @@ -4069,7 +4057,6 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; - const struct pci_device_id *id; struct hclge_dev *hdev; int ret; @@ -4084,10 +4071,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hdev->ae_dev = ae_dev; ae_dev->priv = hdev; - id = pci_match_id(roce_pci_tbl, ae_dev->pdev); - if (id) - hnae_set_bit(ae_dev->flag, HNAE_DEV_SUPPORT_ROCE_B, 1); - ret = hclge_pci_init(hdev); if (ret) { dev_err(&pdev->dev, "PCI init failed\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 4d68d6ea5143..94d8bb5b92f0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -41,11 +41,16 @@ static struct hnae3_client client; static const struct pci_device_id hns3_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0}, - {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), + BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), + BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), + BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), + BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), + BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, /* required last entry */ {0, } }; @@ -1348,6 +1353,7 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } ae_dev->pdev = pdev; + ae_dev->flag = ent->driver_data; ae_dev->dev_type = HNAE3_DEV_KNIC; pci_set_drvdata(pdev, ae_dev); From 2daf4a6536f3109ed0ed758cec14743e0e5c20ea Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:51 +0800 Subject: [PATCH 176/279] net: hns3: Fix initialization when cmd is not supported When ae_dev doesn't support DCB, rx_priv_wl_config, common_thrd_config and tm_qs_bp_cfg can't be called, otherwise cmd return fail, which causes the hclge module initialization process to fail. This patch fix it by adding a DCB capability flag to check if the ae_dev support DCB. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 7 +++++ .../hisilicon/hns3/hns3pf/hclge_main.c | 26 +++++++++++-------- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 4 +++ .../hisilicon/hns3/hns3pf/hns3_enet.c | 10 +++---- 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 0f7b61a92f44..ad685f5aa6d1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -50,10 +50,17 @@ #define HNAE3_DEV_INITED_B 0x0 #define HNAE3_DEV_SUPPORT_ROCE_B 0x1 +#define HNAE3_DEV_SUPPORT_DCB_B 0x2 + +#define HNAE3_DEV_SUPPORT_ROCE_DCB_BITS (BIT(HNAE3_DEV_SUPPORT_DCB_B) |\ + BIT(HNAE3_DEV_SUPPORT_ROCE_B)) #define hnae3_dev_roce_supported(hdev) \ hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B) +#define hnae3_dev_dcb_supported(hdev) \ + hnae_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_DCB_B) + #define ring_ptr_move_fw(ring, p) \ ((ring)->p = ((ring)->p + 1) % (ring)->desc_num) #define ring_ptr_move_bw(ring, p) \ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6953d19c6475..903f43a8c2a1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1772,18 +1772,22 @@ int hclge_buffer_alloc(struct hclge_dev *hdev) return ret; } - ret = hclge_rx_priv_wl_config(hdev); - if (ret) { - dev_err(&hdev->pdev->dev, - "could not configure rx private waterline %d\n", ret); - return ret; - } + if (hnae3_dev_dcb_supported(hdev)) { + ret = hclge_rx_priv_wl_config(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "could not configure rx private waterline %d\n", + ret); + return ret; + } - ret = hclge_common_thrd_config(hdev); - if (ret) { - dev_err(&hdev->pdev->dev, - "could not configure common threshold %d\n", ret); - return ret; + ret = hclge_common_thrd_config(hdev); + if (ret) { + dev_err(&hdev->pdev->dev, + "could not configure common threshold %d\n", + ret); + return ret; + } } ret = hclge_common_wl_config(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 1c577d268f00..c91dbf19c4b1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -976,6 +976,10 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev) if (ret) return ret; + /* Only DCB-supported dev supports qset back pressure setting */ + if (!hnae3_dev_dcb_supported(hdev)) + return 0; + for (i = 0; i < hdev->tm_info.num_tc; i++) { ret = hclge_tm_qs_bp_cfg(hdev, i); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 94d8bb5b92f0..35369e1c8036 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -42,15 +42,15 @@ static const struct pci_device_id hns3_pci_tbl[] = { {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), - BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + HNAE3_DEV_SUPPORT_ROCE_DCB_BITS}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), - BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + HNAE3_DEV_SUPPORT_ROCE_DCB_BITS}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), - BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + HNAE3_DEV_SUPPORT_ROCE_DCB_BITS}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), - BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + HNAE3_DEV_SUPPORT_ROCE_DCB_BITS}, {PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), - BIT(HNAE3_DEV_SUPPORT_ROCE_B)}, + HNAE3_DEV_SUPPORT_ROCE_DCB_BITS}, /* required last entry */ {0, } }; From d221df4e0faae2b9cc8ad78f3e5e777461b6b542 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:52 +0800 Subject: [PATCH 177/279] net: hns3: Fix for DEFAULT_DV when dev doesn't support DCB When ae_dev doesn't support DCB, DEFAULT_DV must be set to a lower value, otherwise the buffer allocation process will fail. This patch fix it by setting it to 30K bytes. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index c2b613b40509..30e2ad5ac0da 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -688,6 +688,7 @@ struct hclge_reset_tqp_queue { #define HCLGE_DEFAULT_TX_BUF 0x4000 /* 16k bytes */ #define HCLGE_TOTAL_PKT_BUF 0x108000 /* 1.03125M bytes */ #define HCLGE_DEFAULT_DV 0xA000 /* 40k byte */ +#define HCLGE_DEFAULT_NON_DCB_DV 0x7800 /* 30K byte */ #define HCLGE_TYPE_CRQ 0 #define HCLGE_TYPE_CSQ 1 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 903f43a8c2a1..796370adf99c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1444,7 +1444,11 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all) tc_num = hclge_get_tc_num(hdev); pfc_enable_num = hclge_get_pfc_enalbe_num(hdev); - shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV; + if (hnae3_dev_dcb_supported(hdev)) + shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_DV; + else + shared_buf_min = 2 * hdev->mps + HCLGE_DEFAULT_NON_DCB_DV; + shared_buf_tc = pfc_enable_num * hdev->mps + (tc_num - pfc_enable_num) * hdev->mps / 2 + hdev->mps; From bb1fe9ea6371e075d3d1448cd3ff6441d31307be Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:53 +0800 Subject: [PATCH 178/279] net: hns3: Fix for not setting rx private buffer size to zero When rx private buffer is disabled, there may be some case that the rx private buffer is not set to zero, which may cause buffer allocation process to fail. This patch fixes this problem by setting priv->enable to 0 and priv->buf_size to zero when rx private buffer is disabled. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 796370adf99c..a7d8fb1e15f6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1504,6 +1504,11 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) priv->wl.high = 2 * hdev->mps; priv->buf_size = priv->wl.high; } + } else { + priv->enable = 0; + priv->wl.low = 0; + priv->wl.high = 0; + priv->buf_size = 0; } } @@ -1516,8 +1521,15 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { priv = &hdev->priv_buf[i]; - if (hdev->hw_tc_map & BIT(i)) - priv->enable = 1; + priv->enable = 0; + priv->wl.low = 0; + priv->wl.high = 0; + priv->buf_size = 0; + + if (!(hdev->hw_tc_map & BIT(i))) + continue; + + priv->enable = 1; if (hdev->tm_info.hw_pfc_map & BIT(i)) { priv->wl.low = 128; From b8c8bf47da5576657370798da6f18a8cb0245d5b Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:54 +0800 Subject: [PATCH 179/279] net: hns3: Fix for rx_priv_buf_alloc not setting rx shared buffer rx_priv_buf_alloc is used to tell hardware how much buffer is used for rx direction, right now only the private buffer is assigned. For ae_dev that doesn't support DCB, private rx buffer is assigned to zero, only shared rx buffer is used. So not setting the shared rx buffer cause dropping of packet in SSU. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 3 ++- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 30e2ad5ac0da..758cf3948131 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -270,7 +270,8 @@ struct hclge_tx_buff_alloc { struct hclge_rx_priv_buff { __le16 buf_num[HCLGE_TC_NUM]; - u8 rsv[8]; + __le16 shared_buf; + u8 rsv[6]; }; struct hclge_query_version { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a7d8fb1e15f6..e313552bb23d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1622,6 +1622,10 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev) cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B); } + req->shared_buf = + cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) | + (1 << HCLGE_TC0_PRI_BUF_EN_B)); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, From d602a52540c9b92e0dd152cfe1d0848c23f08894 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:55 +0800 Subject: [PATCH 180/279] net: hns3: Fix for rx priv buf allocation when DCB is not supported When hdev doesn't support DCB, rx private buffer is not allocated, otherwise there is not enough buffer for rx shared buffer, causing buffer allocation process to fail. This patch fixes by checking the dcb capability in hclge_rx_buffer_calc. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e313552bb23d..c660f0caf709 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -1489,6 +1489,16 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size) struct hclge_priv_buf *priv; int i; + /* When DCB is not supported, rx private + * buffer is not allocated. + */ + if (!hnae3_dev_dcb_supported(hdev)) { + if (!hclge_is_rx_buf_ok(hdev, rx_all)) + return -ENOMEM; + + return 0; + } + /* step 1, try to alloc private buffer for all enabled tc */ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { priv = &hdev->priv_buf[i]; From c4726338d928c824f56c27734d837b8244132705 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:56 +0800 Subject: [PATCH 181/279] net: hns3: Fix typo error for feild in hclge_tm This patch fixes a typo error for feild, which should be field. Fixes: 848440544b41f ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 20 +++++++++---------- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.h | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index c91dbf19c4b1..fe659f752237 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -280,11 +280,11 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pg_id = pg_id; - hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b); - hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u); - hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s); - hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b); - hclge_tm_set_feild(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s); + hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b); + hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u); + hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s); + hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b); + hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s); return hclge_cmd_send(&hdev->hw, &desc, 1); } @@ -307,11 +307,11 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pri_id = pri_id; - hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b); - hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u); - hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s); - hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b); - hclge_tm_set_feild(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s); + hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b); + hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u); + hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s); + hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b); + hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s); return hclge_cmd_send(&hdev->hw, &desc, 1); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index 7e67337dfaf2..85158b0d73fe 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -94,10 +94,10 @@ struct hclge_bp_to_qs_map_cmd { u32 rsvd1; }; -#define hclge_tm_set_feild(dest, string, val) \ +#define hclge_tm_set_field(dest, string, val) \ hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH), val) -#define hclge_tm_get_feild(src, string) \ +#define hclge_tm_get_field(src, string) \ hnae_get_field((src), (HCLGE_TM_SHAP_##string##_MSK), \ (HCLGE_TM_SHAP_##string##_LSH)) From 68ece54efd417d415462adbaa2700cba50de3ff6 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:57 +0800 Subject: [PATCH 182/279] net: hns3: Fix for setting rss_size incorrectly rss_size is 1, 2, 4, 8, 16, 32, 64, 128, but acutal tc queue size can be any u16 less than 128. If tc queue size is 5, we set the rss_size to 8, indirection table will be used to limit the size of actual queue size. It may cause dropping of receiving packet in hardware if rss_size is not set correctly. For now, each TC has the same rss size. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- .../hisilicon/hns3/hns3pf/hclge_main.c | 80 +++++++++---------- .../hisilicon/hns3/hns3pf/hclge_main.h | 1 + .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 1 + 3 files changed, 40 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c660f0caf709..e0685e630afe 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2606,6 +2606,7 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev) u16 tc_valid[HCLGE_MAX_TC_NUM]; u16 tc_size[HCLGE_MAX_TC_NUM]; u32 *rss_indir = NULL; + u16 rss_size = 0, roundup_size; const u8 *key; int i, ret, j; @@ -2620,7 +2621,13 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev) for (j = 0; j < hdev->num_vmdq_vport + 1; j++) { for (i = 0; i < HCLGE_RSS_IND_TBL_SIZE; i++) { vport[j].rss_indirection_tbl[i] = - i % hdev->rss_size_max; + i % vport[j].alloc_rss_size; + + /* vport 0 is for PF */ + if (j != 0) + continue; + + rss_size = vport[j].alloc_rss_size; rss_indir[i] = vport[j].rss_indirection_tbl[i]; } } @@ -2637,42 +2644,31 @@ static int hclge_rss_init_hw(struct hclge_dev *hdev) if (ret) goto err; - for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { - if (hdev->hw_tc_map & BIT(i)) - tc_valid[i] = 1; - else - tc_valid[i] = 0; - - switch (hdev->rss_size_max) { - case HCLGE_RSS_TC_SIZE_0: - tc_size[i] = 0; - break; - case HCLGE_RSS_TC_SIZE_1: - tc_size[i] = 1; - break; - case HCLGE_RSS_TC_SIZE_2: - tc_size[i] = 2; - break; - case HCLGE_RSS_TC_SIZE_3: - tc_size[i] = 3; - break; - case HCLGE_RSS_TC_SIZE_4: - tc_size[i] = 4; - break; - case HCLGE_RSS_TC_SIZE_5: - tc_size[i] = 5; - break; - case HCLGE_RSS_TC_SIZE_6: - tc_size[i] = 6; - break; - case HCLGE_RSS_TC_SIZE_7: - tc_size[i] = 7; - break; - default: - break; - } - tc_offset[i] = hdev->rss_size_max * i; + /* Each TC have the same queue size, and tc_size set to hardware is + * the log2 of roundup power of two of rss_size, the acutal queue + * size is limited by indirection table. + */ + if (rss_size > HCLGE_RSS_TC_SIZE_7 || rss_size == 0) { + dev_err(&hdev->pdev->dev, + "Configure rss tc size failed, invalid TC_SIZE = %d\n", + rss_size); + return -EINVAL; } + + roundup_size = roundup_pow_of_two(rss_size); + roundup_size = ilog2(roundup_size); + + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { + tc_valid[i] = 0; + + if (!(hdev->hw_tc_map & BIT(i))) + continue; + + tc_valid[i] = 1; + tc_size[i] = roundup_size; + tc_offset[i] = rss_size * i; + } + ret = hclge_set_rss_tc_mode(hdev, tc_valid, tc_size, tc_offset); err: @@ -4167,12 +4163,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - ret = hclge_rss_init_hw(hdev); - if (ret) { - dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret); - return ret; - } - ret = hclge_init_vlan_config(hdev); if (ret) { dev_err(&pdev->dev, "VLAN init fail, ret =%d\n", ret); @@ -4185,6 +4175,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } + ret = hclge_rss_init_hw(hdev); + if (ret) { + dev_err(&pdev->dev, "Rss init fail, ret =%d\n", ret); + return ret; + } + setup_timer(&hdev->service_timer, hclge_service_timer, (unsigned long)hdev); INIT_WORK(&hdev->service_task, hclge_service_task); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index edb10ad075eb..7f8dd129c10d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -477,6 +477,7 @@ struct hclge_vport { u8 rss_hash_key[HCLGE_RSS_KEY_SIZE]; /* User configured hash keys */ /* User configured lookup table entries */ u8 rss_indirection_tbl[HCLGE_RSS_IND_TBL_SIZE]; + u16 alloc_rss_size; u16 qs_offset; u16 bw_limit; /* VSI BW Limit (0 = disabled) */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index fe659f752237..b7ba7aa66620 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -397,6 +397,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->num_tqps / kinfo->num_tc); vport->qs_offset = hdev->tm_info.num_tc * vport->vport_id; vport->dwrr = 100; /* 100 percent as init */ + vport->alloc_rss_size = kinfo->rss_size; for (i = 0; i < kinfo->num_tc; i++) { if (hdev->hw_tc_map & BIT(i)) { From c5795c5308af81568d1573598716091120c85a38 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Wed, 20 Sep 2017 18:52:58 +0800 Subject: [PATCH 183/279] net: hns3: Fix for pri to tc mapping in TM Current mapping between pri and tc is one to one, so user can't map multi priorities to the same tc. This patch changes the mapping to many to one. Fixes: 848440544b41f ("net: hns3: Add support of TX Scheduler & Shaper to HNS3 driver") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 3 ++- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 +- .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 16 +++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index ad685f5aa6d1..1a01cadfe5f3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -376,12 +376,12 @@ struct hnae3_ae_algo { struct hnae3_tc_info { u16 tqp_offset; /* TQP offset from base TQP */ u16 tqp_count; /* Total TQPs */ - u8 up; /* user priority */ u8 tc; /* TC index */ bool enable; /* If this TC is enable or not */ }; #define HNAE3_MAX_TC 8 +#define HNAE3_MAX_USER_PRIO 8 struct hnae3_knic_private_info { struct net_device *netdev; /* Set by KNIC client when init instance */ u16 rss_size; /* Allocated RSS queues */ @@ -389,6 +389,7 @@ struct hnae3_knic_private_info { u16 num_desc; u8 num_tc; /* Total number of enabled TCs */ + u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */ struct hnae3_tc_info tc_info[HNAE3_MAX_TC]; /* Idx of array is HW TC */ u16 num_tqps; /* total number of TQPs in this handle */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 7f8dd129c10d..9fcfd9395424 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -176,7 +176,6 @@ struct hclge_pg_info { struct hclge_tc_info { u8 tc_id; u8 tc_sch_mode; /* 0: sp; 1: dwrr */ - u8 up; u8 pgid; u32 bw_limit; }; @@ -197,6 +196,7 @@ struct hclge_tm_info { u8 num_tc; u8 num_pg; /* It must be 1 if vNET-Base schd */ u8 pg_dwrr[HCLGE_PG_NUM]; + u8 prio_tc[HNAE3_MAX_USER_PRIO]; struct hclge_pg_info pg_info[HCLGE_PG_NUM]; struct hclge_tc_info tc_info[HNAE3_MAX_TC]; enum hclge_fc_mode fc_mode; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index b7ba7aa66620..73a75d7cc551 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -128,9 +128,7 @@ static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id) { u8 tc; - for (tc = 0; tc < hdev->tm_info.num_tc; tc++) - if (hdev->tm_info.tc_info[tc].up == pri_id) - break; + tc = hdev->tm_info.prio_tc[pri_id]; if (tc >= hdev->tm_info.num_tc) return -EINVAL; @@ -158,7 +156,7 @@ static int hclge_up_to_tc_map(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PRI_TO_TC_MAPPING, false); - for (pri_id = 0; pri_id < hdev->tm_info.num_tc; pri_id++) { + for (pri_id = 0; pri_id < HNAE3_MAX_USER_PRIO; pri_id++) { ret = hclge_fill_pri_array(hdev, pri, pri_id); if (ret) return ret; @@ -405,16 +403,17 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->tc_info[i].tqp_offset = i * kinfo->rss_size; kinfo->tc_info[i].tqp_count = kinfo->rss_size; kinfo->tc_info[i].tc = i; - kinfo->tc_info[i].up = hdev->tm_info.tc_info[i].up; } else { /* Set to default queue if TC is disable */ kinfo->tc_info[i].enable = false; kinfo->tc_info[i].tqp_offset = 0; kinfo->tc_info[i].tqp_count = 1; kinfo->tc_info[i].tc = 0; - kinfo->tc_info[i].up = 0; } } + + memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc, + FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc)); } static void hclge_tm_vport_info_update(struct hclge_dev *hdev) @@ -436,12 +435,15 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) for (i = 0; i < hdev->tm_info.num_tc; i++) { hdev->tm_info.tc_info[i].tc_id = i; hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR; - hdev->tm_info.tc_info[i].up = i; hdev->tm_info.tc_info[i].pgid = 0; hdev->tm_info.tc_info[i].bw_limit = hdev->tm_info.pg_info[0].bw_limit; } + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) + hdev->tm_info.prio_tc[i] = + (i >= hdev->tm_info.num_tc) ? 0 : i; + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; } From 4d61eda812041ef9c820d7f147884133fd3307bc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 19 Sep 2017 16:27:39 +0100 Subject: [PATCH 184/279] CIFS: make arrays static const, reduces object code size Don't populate the read-only arrays types[] on the stack, instead make them both static const. Makes the object code smaller by over 200 bytes: Before: text data bss dec hex filename 111503 37696 448 149647 2488f fs/cifs/file.o After: text data bss dec hex filename 111140 37856 448 149444 247c4 fs/cifs/file.o Signed-off-by: Colin Ian King Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0786f19d288f..822311919163 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1102,8 +1102,10 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) struct cifs_tcon *tcon; unsigned int num, max_num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; - int types[] = {LOCKING_ANDX_LARGE_FILES, - LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; + static const int types[] = { + LOCKING_ANDX_LARGE_FILES, + LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES + }; int i; xid = get_xid(); @@ -1434,8 +1436,10 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, unsigned int xid) { int rc = 0, stored_rc; - int types[] = {LOCKING_ANDX_LARGE_FILES, - LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; + static const int types[] = { + LOCKING_ANDX_LARGE_FILES, + LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES + }; unsigned int i; unsigned int max_num, num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; From 94183331e815617246b1baa97e0916f358c794bb Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Thu, 7 Sep 2017 16:03:27 +0800 Subject: [PATCH 185/279] cifs: release cifs root_cred after exit_cifs memory leak was found by kmemleak. exit_cifs_spnego should be called before cifs module removed, or cifs root_cred will not be released. kmemleak report: unreferenced object 0xffff880070a3ce40 (size 192): backtrace: kmemleak_alloc+0x4a/0xa0 kmem_cache_alloc+0xc7/0x1d0 prepare_kernel_cred+0x20/0x120 init_cifs_spnego+0x2d/0x170 [cifs] 0xffffffffc07801f3 do_one_initcall+0x51/0x1b0 do_init_module+0x60/0x1fd load_module+0x161e/0x1b60 SYSC_finit_module+0xa9/0x100 SyS_finit_module+0xe/0x10 Signed-off-by: Shu Wang Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg CC: Stable --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index a864e6575309..8c8b75d33f31 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1449,7 +1449,7 @@ exit_cifs(void) exit_cifs_idmap(); #endif #ifdef CONFIG_CIFS_UPCALL - unregister_key_type(&cifs_spnego_key_type); + exit_cifs_spnego(); #endif cifs_destroy_request_bufs(); cifs_destroy_mids(); From f5c4ba816315d3b813af16f5571f86c8d4e897bd Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Fri, 8 Sep 2017 18:48:33 +0800 Subject: [PATCH 186/279] cifs: release auth_key.response for reconnect. There is a race that cause cifs reconnect in cifs_mount, - cifs_mount - cifs_get_tcp_session - [ start thread cifs_demultiplex_thread - cifs_read_from_socket: -ECONNABORTED - DELAY_WORK smb2_reconnect_server ] - cifs_setup_session - [ smb2_reconnect_server ] auth_key.response was allocated in cifs_setup_session, and will release when the session destoried. So when session re- connect, auth_key.response should be check and released. Tested with my system: CIFS VFS: Free previous auth_key.response = ffff8800320bbf80 A simple auth_key.response allocation call trace: - cifs_setup_session - SMB2_sess_setup - SMB2_sess_auth_rawntlmssp_authenticate - build_ntlmssp_auth_blob - setup_ntlmv2_rsp Signed-off-by: Shu Wang Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8d38b22afb2b..0bfc2280436d 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -4154,6 +4154,14 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, cifs_dbg(FYI, "Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d\n", server->sec_mode, server->capabilities, server->timeAdj); + if (ses->auth_key.response) { + cifs_dbg(VFS, "Free previous auth_key.response = %p\n", + ses->auth_key.response); + kfree(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } + if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, nls_info); From 0603c96f3af50e2f9299fa410c224ab1d465e0f9 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 20 Sep 2017 19:57:18 -0500 Subject: [PATCH 187/279] SMB: Validate negotiate (to protect against downgrade) even if signing off As long as signing is supported (ie not a guest user connection) and connection is SMB3 or SMB3.02, then validate negotiate (protect against man in the middle downgrade attacks). We had been doing this only when signing was required, not when signing was just enabled, but this more closely matches recommended SMB3 behavior and is better security. Suggested by Metze. Signed-off-by: Steve French Reviewed-by: Jeremy Allison Acked-by: Stefan Metzmacher Reviewed-by: Ronnie Sahlberg CC: Stable --- fs/cifs/smb2pdu.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d499ce265c3b..6f0e6343c15e 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -656,15 +656,22 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) /* * validation ioctl must be signed, so no point sending this if we - * can not sign it. We could eventually change this to selectively + * can not sign it (ie are not known user). Even if signing is not + * required (enabled but not negotiated), in those cases we selectively * sign just this, the first and only signed request on a connection. - * This is good enough for now since a user who wants better security - * would also enable signing on the mount. Having validation of - * negotiate info for signed connections helps reduce attack vectors + * Having validation of negotiate info helps reduce attack vectors. */ - if (tcon->ses->server->sign == false) + if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST) return 0; /* validation requires signing */ + if (tcon->ses->user_name == NULL) { + cifs_dbg(FYI, "Can't validate negotiate: null user mount\n"); + return 0; /* validation requires signing */ + } + + if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) + cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); + vneg_inbuf.Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, From a90bcb86ae700c12432446c4aa1819e7b8e172ec Mon Sep 17 00:00:00 2001 From: Petar Penkov Date: Tue, 29 Aug 2017 11:20:32 -0700 Subject: [PATCH 188/279] iov_iter: fix page_copy_sane for compound pages Issue is that if the data crosses a page boundary inside a compound page, this check will incorrectly trigger a WARN_ON. To fix this, compute the order using the head of the compound page and adjust the offset to be relative to that head. Fixes: 72e809ed81ed ("iov_iter: sanity checks for copy to/from page primitives") Signed-off-by: Petar Penkov CC: Al Viro CC: Eric Dumazet Signed-off-by: Al Viro --- lib/iov_iter.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 52c8dd6d8e82..1c1c06ddc20a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -687,8 +687,10 @@ EXPORT_SYMBOL(_copy_from_iter_full_nocache); static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) { - size_t v = n + offset; - if (likely(n <= v && v <= (PAGE_SIZE << compound_order(page)))) + struct page *head = compound_head(page); + size_t v = n + offset + page_address(page) - page_address(head); + + if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head)))) return true; WARN_ON(1); return false; From 58aff0af757356065f33290d96a9cd46dfbcae88 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Sep 2017 17:47:38 +0100 Subject: [PATCH 189/279] ipc/shm: Fix order of parameters when calling copy_compat_shmid_to_user Commit 553f770ef71b ("ipc: move compat shmctl to native") moved the compat IPC syscall handling into ipc/shm.c and refactored the struct accessors in the process. Unfortunately, the call to copy_compat_shmid_to_user when handling a compat {IPC,SHM}_STAT command gets the arguments the wrong way round, passing a kernel stack address as the user buffer (destination) and the user buffer as the kernel stack address (source). This patch fixes the parameter ordering so the buffers are accessed correctly. Cc: Al Viro Cc: Andrew Morton Signed-off-by: Will Deacon Signed-off-by: Al Viro --- ipc/shm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ipc/shm.c b/ipc/shm.c index 1b3adfe3c60e..1e2b1692ba2c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1237,7 +1237,7 @@ COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) err = shmctl_stat(ns, shmid, cmd, &sem64); if (err < 0) return err; - if (copy_compat_shmid_to_user(&sem64, uptr, version)) + if (copy_compat_shmid_to_user(uptr, &sem64, version)) err = -EFAULT; return err; From 3e77adeea3c5393c9b624832f65441e92867f618 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 7 Sep 2017 16:35:40 +1000 Subject: [PATCH 190/279] powerpc/eeh: Create PHB PEs after EEH is initialized Otherwise we end up not yet having computed the right diag data size on powernv where EEH initialization is delayed, thus causing memory corruption later on when calling OPAL. Fixes: 5cb1f8fdddb7 ("powerpc/powernv/pci: Dynamically allocate PHB diag data") Cc: stable@vger.kernel.org # v4.13+ Signed-off-by: Benjamin Herrenschmidt Acked-by: Russell Currey Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 4 ++++ arch/powerpc/kernel/eeh_dev.c | 18 ------------------ 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 9e816787c0d4..116000b45531 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1019,6 +1019,10 @@ int eeh_init(void) } else if ((ret = eeh_ops->init())) return ret; + /* Initialize PHB PEs */ + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) + eeh_dev_phb_init_dynamic(hose); + /* Initialize EEH event */ ret = eeh_event_init(); if (ret) diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c index ad04ecd63c20..a34e6912c15e 100644 --- a/arch/powerpc/kernel/eeh_dev.c +++ b/arch/powerpc/kernel/eeh_dev.c @@ -78,21 +78,3 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb) /* EEH PE for PHB */ eeh_phb_pe_create(phb); } - -/** - * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs - * - * Scan all the existing PHBs and create EEH devices for their OF - * nodes and their children OF nodes - */ -static int __init eeh_dev_phb_init(void) -{ - struct pci_controller *phb, *tmp; - - list_for_each_entry_safe(phb, tmp, &hose_list, list_node) - eeh_dev_phb_init_dynamic(phb); - - return 0; -} - -core_initcall(eeh_dev_phb_init); From 087ff6a5ae3052bb2835e191094b793789cb8817 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Sep 2017 17:02:51 -0400 Subject: [PATCH 191/279] powerpc/pseries: Fix "OF: ERROR: Bad of_node_put() on /cpus" during DLPAR Commit 215ee763f8cb ("powerpc: pseries: remove dlpar_attach_node dependency on full path") reworked dlpar_attach_node() to no longer look up the parent node "/cpus", but instead to have the parent node passed by the caller in the function parameter list. As a result dlpar_attach_node() is no longer responsible for freeing the reference to the parent node. However, commit 215ee763f8cb failed to remove the of_node_put(parent) call in dlpar_attach_node(), or to take into account that the reference to the parent in the caller dlpar_cpu_add() needs to be held until after dlpar_attach_node() returns. As a result doing repeated cpu add/remove dlpar operations will eventually result in the following error: OF: ERROR: Bad of_node_put() on /cpus CPU: 0 PID: 10896 Comm: drmgr Not tainted 4.13.0-autotest #1 Call Trace: dump_stack+0x15c/0x1f8 (unreliable) of_node_release+0x1a4/0x1c0 kobject_put+0x1a8/0x310 kobject_del+0xbc/0xf0 __of_detach_node_sysfs+0x144/0x210 of_detach_node+0xf0/0x180 dlpar_detach_node+0xc4/0x120 dlpar_cpu_remove+0x280/0x560 dlpar_cpu_release+0xbc/0x1b0 arch_cpu_release+0x6c/0xb0 cpu_release_store+0xa0/0x100 dev_attr_store+0x68/0xa0 sysfs_kf_write+0xa8/0xf0 kernfs_fop_write+0x2cc/0x400 __vfs_write+0x5c/0x340 vfs_write+0x1a8/0x3d0 SyS_write+0xa8/0x1a0 system_call+0x58/0x6c Fix the issue by removing the of_node_put(parent) call from dlpar_attach_node(), and ensuring that the reference to the parent node is properly held and released by the caller dlpar_cpu_add(). Fixes: 215ee763f8cb ("powerpc: pseries: remove dlpar_attach_node dependency on full path") Signed-off-by: Tyrel Datwyler Reported-by: Abdul Haleem [mpe: Add a comment in the code and frob the change log slightly] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/dlpar.c | 1 - arch/powerpc/platforms/pseries/hotplug-cpu.c | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 783f36364690..e45b5f10645a 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -266,7 +266,6 @@ int dlpar_attach_node(struct device_node *dn, struct device_node *parent) return rc; } - of_node_put(dn->parent); return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index fc0d8f97c03a..fadb95efbb9e 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -462,15 +462,19 @@ static ssize_t dlpar_cpu_add(u32 drc_index) } dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent); - of_node_put(parent); if (!dn) { pr_warn("Failed call to configure-connector, drc index: %x\n", drc_index); dlpar_release_drc(drc_index); + of_node_put(parent); return -EINVAL; } rc = dlpar_attach_node(dn, parent); + + /* Regardless we are done with parent now */ + of_node_put(parent); + if (rc) { saved_rc = rc; pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n", From b537ca6fede69a281dc524983e5e633d79a10a08 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Sep 2017 17:02:52 -0400 Subject: [PATCH 192/279] powerpc/pseries: Fix parent_dn reference leak in add_dt_node() A reference to the parent device node is held by add_dt_node() for the node to be added. If the call to dlpar_configure_connector() fails add_dt_node() returns ENOENT and that reference is not freed. Add a call to of_node_put(parent_dn) prior to bailing out after a failed dlpar_configure_connector() call. Fixes: 8d5ff320766f ("powerpc/pseries: Make dlpar_configure_connector parent node aware") Cc: stable@vger.kernel.org # v3.12+ Signed-off-by: Tyrel Datwyler Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/mobility.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 210ce632d63e..f7042ad492ba 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -226,8 +226,10 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index) return -ENOENT; dn = dlpar_configure_connector(drc_index, parent_dn); - if (!dn) + if (!dn) { + of_node_put(parent_dn); return -ENOENT; + } rc = dlpar_attach_node(dn, parent_dn); if (rc) From 9bbe7dc05c1f80200621a626ec2266987f4b42b3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Sep 2017 17:55:24 +0200 Subject: [PATCH 193/279] MIPS: MSP71xx: Include asm/setup.h msp71xx_defconfig can not be built at the in v4.14-rc1 arch/mips/pmcs-msp71xx/msp_smp.c:72:2: error: implicit declaration of function 'set_vi_handler' [-Werror=implicit-function-declaration] I don't know what caused the regression, but including the right header is the obvious fix. Signed-off-by: Arnd Bergmann Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/17309/ Signed-off-by: Ralf Baechle --- arch/mips/pmcs-msp71xx/msp_smp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c index ffa0f7101a97..2b08242ade62 100644 --- a/arch/mips/pmcs-msp71xx/msp_smp.c +++ b/arch/mips/pmcs-msp71xx/msp_smp.c @@ -22,6 +22,8 @@ #include #include +#include + #ifdef CONFIG_MIPS_MT_SMP #define MIPS_CPU_IPI_RESCHED_IRQ 0 /* SW int 0 for resched */ #define MIPS_CPU_IPI_CALL_IRQ 1 /* SW int 1 for call */ From c22c8043105591a8b74142cf837604087cdba40b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 19 Sep 2017 14:11:22 +0100 Subject: [PATCH 194/279] MIPS: Fix input modify in __write_64bit_c0_split() The inline asm in __write_64bit_c0_split() modifies the 64-bit input operand by shifting the high register left by 32, and constructing the full 64-bit value in the low register (even on a 32-bit kernel), so if that value is used again it could cause breakage as GCC would assume the registers haven't changed when they have. To quote the GCC extended asm documentation: > Warning: Do not modify the contents of input-only operands (except for > inputs tied to outputs). The compiler assumes that on exit from the > asm statement these operands contain the same values as they had > before executing the statement. Avoid modifying the input by using a temporary variable as an output which is modified instead of the input and not otherwise used. The asm is always __volatile__ so GCC shouldn't optimise it out. The low register of the temporary output is written before the high register of the input is read, so we have two constraint alternatives, one where both use the same registers (for when the input value isn't subsequently used), and one with an early clobber on the output in case the low output uses the same register as the high input. This allows the resulting assembly to remain mostly unchanged. A diff of a MIPS32r6 kernel reveals only three differences, two in relation to write_c0_r10k_diag() in cpu_probe() (register allocation rearranged slightly but otherwise identical), and one in relation to write_c0_cvmmemctl2() in kvm_vz_local_flush_guesttlb_all(), but the octeon CPU is only supported on 64-bit kernels where __write_64bit_c0_split() isn't used so that shouldn't matter in practice. So there currently doesn't appear to be anything broken by this bug. Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17315/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mipsregs.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index e4ed1bc9a734..a6810923b3f0 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -1377,29 +1377,32 @@ do { \ #define __write_64bit_c0_split(source, sel, val) \ do { \ + unsigned long long __tmp; \ unsigned long __flags; \ \ local_irq_save(__flags); \ if (sel == 0) \ __asm__ __volatile__( \ ".set\tmips64\n\t" \ - "dsll\t%L0, %L0, 32\n\t" \ + "dsll\t%L0, %L1, 32\n\t" \ "dsrl\t%L0, %L0, 32\n\t" \ - "dsll\t%M0, %M0, 32\n\t" \ + "dsll\t%M0, %M1, 32\n\t" \ "or\t%L0, %L0, %M0\n\t" \ "dmtc0\t%L0, " #source "\n\t" \ ".set\tmips0" \ - : : "r" (val)); \ + : "=&r,r" (__tmp) \ + : "r,0" (val)); \ else \ __asm__ __volatile__( \ ".set\tmips64\n\t" \ - "dsll\t%L0, %L0, 32\n\t" \ + "dsll\t%L0, %L1, 32\n\t" \ "dsrl\t%L0, %L0, 32\n\t" \ - "dsll\t%M0, %M0, 32\n\t" \ + "dsll\t%M0, %M1, 32\n\t" \ "or\t%L0, %L0, %M0\n\t" \ "dmtc0\t%L0, " #source ", " #sel "\n\t" \ ".set\tmips0" \ - : : "r" (val)); \ + : "=&r,r" (__tmp) \ + : "r,0" (val)); \ local_irq_restore(__flags); \ } while (0) From 8eba3651f1dad49c83bb7f8d672301dac4c6add6 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Tue, 12 Sep 2017 20:36:28 +0200 Subject: [PATCH 195/279] MIPS: PCI: fix pcibios_map_irq section mismatch Drop the __init from pcibios_map_irq() to make this section mis- match go away: WARNING: vmlinux.o(.text+0x56acd4): Section mismatch in reference from the function pcibios_scanbus() to the function .init.text:pcibios_map_irq() The function pcibios_scanbus() references the function __init pcibios_map_irq(). This is often because pcibios_scanbus lacks a __init annotation or the annotation of pcibios_map_irq is wrong. Run-Tested only on Alchemy. Signed-off-by: Manuel Lauss Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/17267/ Signed-off-by: Ralf Baechle --- arch/mips/pci/fixup-capcella.c | 2 +- arch/mips/pci/fixup-cobalt.c | 2 +- arch/mips/pci/fixup-emma2rh.c | 2 +- arch/mips/pci/fixup-fuloong2e.c | 2 +- arch/mips/pci/fixup-ip32.c | 2 +- arch/mips/pci/fixup-lantiq.c | 2 +- arch/mips/pci/fixup-lemote2f.c | 2 +- arch/mips/pci/fixup-loongson3.c | 2 +- arch/mips/pci/fixup-malta.c | 2 +- arch/mips/pci/fixup-mpc30x.c | 2 +- arch/mips/pci/fixup-pmcmsp.c | 2 +- arch/mips/pci/fixup-sni.c | 2 +- arch/mips/pci/fixup-tb0219.c | 2 +- arch/mips/pci/fixup-tb0226.c | 2 +- arch/mips/pci/fixup-tb0287.c | 2 +- arch/mips/pci/pci-alchemy.c | 2 +- arch/mips/pci/pci-bcm47xx.c | 2 +- arch/mips/pci/pci-lasat.c | 2 +- arch/mips/pci/pci-mt7620.c | 2 +- arch/mips/pci/pci-octeon.c | 4 ++-- arch/mips/pci/pci-rt2880.c | 2 +- arch/mips/pci/pci-rt3883.c | 2 +- arch/mips/pci/pci-xlp.c | 2 +- arch/mips/pci/pci-xlr.c | 2 +- 24 files changed, 25 insertions(+), 25 deletions(-) diff --git a/arch/mips/pci/fixup-capcella.c b/arch/mips/pci/fixup-capcella.c index 1c02f5737367..35a671595a37 100644 --- a/arch/mips/pci/fixup-capcella.c +++ b/arch/mips/pci/fixup-capcella.c @@ -38,7 +38,7 @@ static char irq_tab_capcella[][5] __initdata = { [14] = { -1, INTA, INTB, INTC, INTD } }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_tab_capcella[slot][pin]; } diff --git a/arch/mips/pci/fixup-cobalt.c b/arch/mips/pci/fixup-cobalt.c index b3ab59318d91..62810d3fe99b 100644 --- a/arch/mips/pci/fixup-cobalt.c +++ b/arch/mips/pci/fixup-cobalt.c @@ -174,7 +174,7 @@ static char irq_tab_raq2[] __initdata = { [COBALT_PCICONF_ETH1] = ETH1_IRQ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (cobalt_board_id <= COBALT_BRD_ID_QUBE1) return irq_tab_qube1[slot]; diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c index 19caf775c206..4832ac9f118a 100644 --- a/arch/mips/pci/fixup-emma2rh.c +++ b/arch/mips/pci/fixup-emma2rh.c @@ -85,7 +85,7 @@ static void emma2rh_pci_host_fixup(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH, emma2rh_pci_host_fixup); -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_map[slot][pin]; } diff --git a/arch/mips/pci/fixup-fuloong2e.c b/arch/mips/pci/fixup-fuloong2e.c index 50da773faede..b47c2771dc99 100644 --- a/arch/mips/pci/fixup-fuloong2e.c +++ b/arch/mips/pci/fixup-fuloong2e.c @@ -19,7 +19,7 @@ /* South bridge slot number is set by the pci probe process */ static u8 sb_slot = 5; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = 0; diff --git a/arch/mips/pci/fixup-ip32.c b/arch/mips/pci/fixup-ip32.c index 133685e215ee..ea29f5450be3 100644 --- a/arch/mips/pci/fixup-ip32.c +++ b/arch/mips/pci/fixup-ip32.c @@ -39,7 +39,7 @@ static char irq_tab_mace[][5] __initdata = { * irqs. I suppose a device without a pin A will thank us for doing it * right if there exists such a broken piece of crap. */ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return irq_tab_mace[slot][pin]; } diff --git a/arch/mips/pci/fixup-lantiq.c b/arch/mips/pci/fixup-lantiq.c index 2b5427d3f35c..81530a13b349 100644 --- a/arch/mips/pci/fixup-lantiq.c +++ b/arch/mips/pci/fixup-lantiq.c @@ -23,7 +23,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return of_irq_parse_and_map_pci(dev, slot, pin); } diff --git a/arch/mips/pci/fixup-lemote2f.c b/arch/mips/pci/fixup-lemote2f.c index 95ab9a1bd010..7e5991e0e323 100644 --- a/arch/mips/pci/fixup-lemote2f.c +++ b/arch/mips/pci/fixup-lemote2f.c @@ -51,7 +51,7 @@ static char irq_tab[][5] __initdata = { {0, 0, 0, 0, 0}, /* 27: Unused */ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int virq; diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c index 2b6d5e196f99..8a741c2c6685 100644 --- a/arch/mips/pci/fixup-loongson3.c +++ b/arch/mips/pci/fixup-loongson3.c @@ -32,7 +32,7 @@ static void print_fixup_info(const struct pci_dev *pdev) pdev->vendor, pdev->device, pdev->irq); } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { print_fixup_info(dev); return dev->irq; diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c index 40e920c653cc..1f5f25e39590 100644 --- a/arch/mips/pci/fixup-malta.c +++ b/arch/mips/pci/fixup-malta.c @@ -38,7 +38,7 @@ static char irq_tab[][5] __initdata = { {0, PCID, PCIA, PCIB, PCIC } /* 21: PCI Slot 4 */ }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int virq; virq = irq_tab[slot][pin]; diff --git a/arch/mips/pci/fixup-mpc30x.c b/arch/mips/pci/fixup-mpc30x.c index 8e4f8288eca2..5da62c76e271 100644 --- a/arch/mips/pci/fixup-mpc30x.c +++ b/arch/mips/pci/fixup-mpc30x.c @@ -34,7 +34,7 @@ static const int irq_tab_mpc30x[] __initconst = { [29] = MQ200_IRQ, }; -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (slot == 30) return internal_func_irqs[PCI_FUNC(dev->devfn)]; diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c index fab405c21c2f..f2b7b1e4395b 100644 --- a/arch/mips/pci/fixup-pmcmsp.c +++ b/arch/mips/pci/fixup-pmcmsp.c @@ -202,7 +202,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) * RETURNS: IRQ number * ****************************************************************************/ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { #if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL) printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n"); diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c index f67ebeeb4200..309e1c562959 100644 --- a/arch/mips/pci/fixup-sni.c +++ b/arch/mips/pci/fixup-sni.c @@ -130,7 +130,7 @@ static inline int is_rm300_revd(void) return (csmsr & 0xa0) == 0x20; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (sni_brd_type) { case SNI_BRD_PCI_TOWER_CPLUS: diff --git a/arch/mips/pci/fixup-tb0219.c b/arch/mips/pci/fixup-tb0219.c index d0b0083fbd27..cc581535f257 100644 --- a/arch/mips/pci/fixup-tb0219.c +++ b/arch/mips/pci/fixup-tb0219.c @@ -23,7 +23,7 @@ #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/mips/pci/fixup-tb0226.c b/arch/mips/pci/fixup-tb0226.c index 4196ccf3ea3d..b827b5cad5fd 100644 --- a/arch/mips/pci/fixup-tb0226.c +++ b/arch/mips/pci/fixup-tb0226.c @@ -23,7 +23,7 @@ #include #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/mips/pci/fixup-tb0287.c b/arch/mips/pci/fixup-tb0287.c index 8c5039ed75d7..98f26285f2e3 100644 --- a/arch/mips/pci/fixup-tb0287.c +++ b/arch/mips/pci/fixup-tb0287.c @@ -22,7 +22,7 @@ #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { unsigned char bus; int irq = -1; diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index e99ca7702d8a..f15ec98de2de 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -522,7 +522,7 @@ static int __init alchemy_pci_init(void) arch_initcall(alchemy_pci_init); -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct alchemy_pci_context *ctx = dev->sysdata; if (ctx && ctx->board_map_irq) diff --git a/arch/mips/pci/pci-bcm47xx.c b/arch/mips/pci/pci-bcm47xx.c index 76f16eaed0ad..230d7dd273e2 100644 --- a/arch/mips/pci/pci-bcm47xx.c +++ b/arch/mips/pci/pci-bcm47xx.c @@ -28,7 +28,7 @@ #include #include -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return 0; } diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c index 40d2797d2bc4..47f4ee6bbb3b 100644 --- a/arch/mips/pci/pci-lasat.c +++ b/arch/mips/pci/pci-lasat.c @@ -61,7 +61,7 @@ arch_initcall(lasat_pci_setup); #define LASAT_IRQ_PCIC (LASAT_IRQ_BASE + 7) #define LASAT_IRQ_PCID (LASAT_IRQ_BASE + 8) -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { switch (slot) { case 1: diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c index 4e633c1e7ff3..90fba9bf98da 100644 --- a/arch/mips/pci/pci-mt7620.c +++ b/arch/mips/pci/pci-mt7620.c @@ -361,7 +361,7 @@ static int mt7620_pci_probe(struct platform_device *pdev) return 0; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u16 cmd; u32 val; diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 9ee01936862e..771f5de6362d 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -59,7 +59,7 @@ union octeon_pci_address { } s; }; -int __initconst (*octeon_pcibios_map_irq)(const struct pci_dev *dev, +int (*octeon_pcibios_map_irq)(const struct pci_dev *dev, u8 slot, u8 pin); enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID; @@ -74,7 +74,7 @@ enum octeon_dma_bar_type octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_INVALID; * as it goes through each bridge. * Returns Interrupt number for the device */ -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { if (octeon_pcibios_map_irq) return octeon_pcibios_map_irq(dev, slot, pin); diff --git a/arch/mips/pci/pci-rt2880.c b/arch/mips/pci/pci-rt2880.c index d6360fe73d05..711cdccdf65b 100644 --- a/arch/mips/pci/pci-rt2880.c +++ b/arch/mips/pci/pci-rt2880.c @@ -181,7 +181,7 @@ static inline void rt2880_pci_write_u32(unsigned long reg, u32 val) spin_unlock_irqrestore(&rt2880_pci_lock, flags); } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { u16 cmd; int irq = -1; diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c index 04f8ea953297..958899ffe99c 100644 --- a/arch/mips/pci/pci-rt3883.c +++ b/arch/mips/pci/pci-rt3883.c @@ -564,7 +564,7 @@ err_put_intc_node: return err; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return of_irq_parse_and_map_pci(dev, slot, pin); } diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c index 7babf01600cb..9eff9137f78e 100644 --- a/arch/mips/pci/pci-xlp.c +++ b/arch/mips/pci/pci-xlp.c @@ -205,7 +205,7 @@ int xlp_socdev_to_node(const struct pci_dev *lnkdev) return PCI_SLOT(lnkdev->devfn) / 8; } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct pci_dev *lnkdev; int lnkfunc, node; diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c index 26d2dabef281..2a1c81a129ba 100644 --- a/arch/mips/pci/pci-xlr.c +++ b/arch/mips/pci/pci-xlr.c @@ -315,7 +315,7 @@ static void xls_pcie_ack_b(struct irq_data *d) } } -int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) +int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { return get_irq_vector(dev); } From c8e1812960eeae42e2183154927028511c4bc566 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 20 Sep 2017 15:45:36 +0300 Subject: [PATCH 196/279] net_sched: always reset qdisc backlog in qdisc_reset() SKB stored in qdisc->gso_skb also counted into backlog. Some qdiscs don't reset backlog to zero in ->reset(), for example sfq just dequeue and free all queued skb. Signed-off-by: Konstantin Khlebnikov Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 92237e75dbbc..bf8c81e07c70 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -685,6 +685,7 @@ void qdisc_reset(struct Qdisc *qdisc) qdisc->gso_skb = NULL; } qdisc->q.qlen = 0; + qdisc->qstats.backlog = 0; } EXPORT_SYMBOL(qdisc_reset); From 21f4d5cc25ec0e6e8eb8420dd2c399e6d2fc7d14 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 20 Sep 2017 15:46:11 +0300 Subject: [PATCH 197/279] net_sched/hfsc: fix curve activation in hfsc_change_class() If real-time or fair-share curves are enabled in hfsc_change_class() class isn't inserted into rb-trees yet. Thus init_ed() and init_vf() must be called in place of update_ed() and update_vf(). Remove isn't required because for now curves cannot be disabled. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index daaf214e5201..3f88b75488b0 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -958,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } if (cl != NULL) { + int old_flags; + if (parentid) { if (cl->cl_parent && cl->cl_parent->cl_common.classid != parentid) @@ -978,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_lock(sch); + old_flags = cl->cl_flags; + if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); if (fsc != NULL) @@ -986,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, hfsc_change_usc(cl, usc, cur_time); if (cl->qdisc->q.qlen != 0) { - if (cl->cl_flags & HFSC_RSC) - update_ed(cl, qdisc_peek_len(cl->qdisc)); - if (cl->cl_flags & HFSC_FSC) - update_vf(cl, 0, cur_time); + int len = qdisc_peek_len(cl->qdisc); + + if (cl->cl_flags & HFSC_RSC) { + if (old_flags & HFSC_RSC) + update_ed(cl, len); + else + init_ed(cl, len); + } + + if (cl->cl_flags & HFSC_FSC) { + if (old_flags & HFSC_FSC) + update_vf(cl, 0, cur_time); + else + init_vf(cl, len); + } } sch_tree_unlock(sch); From fe2502e49b58606580c77b3d84e42f946de182d8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 20 Sep 2017 09:18:45 -0700 Subject: [PATCH 198/279] net_sched: remove cls_flower idr on failure Fixes: c15ab236d69d ("net/sched: Change cls_flower to use IDR") Cc: Chris Mi Cc: Jiri Pirko Signed-off-by: Cong Wang Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1a267e77c6de..d230cb4c8094 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -922,28 +922,28 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!tc_flags_valid(fnew->flags)) { err = -EINVAL; - goto errout; + goto errout_idr; } } err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); if (err) - goto errout; + goto errout_idr; err = fl_check_assign_mask(head, &mask); if (err) - goto errout; + goto errout_idr; if (!tc_skip_sw(fnew->flags)) { if (!fold && fl_lookup(head, &fnew->mkey)) { err = -EEXIST; - goto errout; + goto errout_idr; } err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, head->ht_params); if (err) - goto errout; + goto errout_idr; } if (!tc_skip_hw(fnew->flags)) { @@ -952,7 +952,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, &mask.key, fnew); if (err) - goto errout; + goto errout_idr; } if (!tc_in_hw(fnew->flags)) @@ -981,6 +981,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(tb); return 0; +errout_idr: + if (fnew->handle) + idr_remove_ext(&head->handle_idr, fnew->handle); errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); From 0ab09befdbb7ca9b969d6206108629ddff43876e Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Wed, 20 Sep 2017 11:17:35 -0700 Subject: [PATCH 199/279] hv_netvsc: fix send buffer failure on MTU change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If MTU is changed the host would reject the send buffer change. This problem is result of recent change to allow changing send buffer size. Every time we change the MTU, we store the previous net_device section count before destroying the buffer, but we don’t store the previous section size. When we reinitialize the buffer, its size is calculated by multiplying the previous count and previous size. Since we continuously increase the MTU, the host returns us a decreasing count value while the section size is reinitialized to 1728 bytes every time. This eventually leads to a condition where the calculated buf_size is so small that the host rejects it. Fixes: 8b5327975ae1 ("netvsc: allow controlling send/recv buffer size") Signed-off-by: Alex Ng Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 2 ++ drivers/net/hyperv/netvsc.c | 7 ++----- drivers/net/hyperv/netvsc_drv.c | 8 ++++++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d98cdfb1536b..5176be76ca7d 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -150,6 +150,8 @@ struct netvsc_device_info { u32 num_chn; u32 send_sections; u32 recv_sections; + u32 send_section_size; + u32 recv_section_size; }; enum rndis_device_state { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index a5511b7326af..8d5077fb0492 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -76,9 +76,6 @@ static struct netvsc_device *alloc_net_device(void) net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; - net_device->recv_section_size = NETVSC_RECV_SECTION_SIZE; - net_device->send_section_size = NETVSC_SEND_SECTION_SIZE; - init_completion(&net_device->channel_init_wait); init_waitqueue_head(&net_device->subchan_open); INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); @@ -262,7 +259,7 @@ static int netvsc_init_buf(struct hv_device *device, int ret = 0; /* Get receive buffer area. */ - buf_size = device_info->recv_sections * net_device->recv_section_size; + buf_size = device_info->recv_sections * device_info->recv_section_size; buf_size = roundup(buf_size, PAGE_SIZE); net_device->recv_buf = vzalloc(buf_size); @@ -344,7 +341,7 @@ static int netvsc_init_buf(struct hv_device *device, goto cleanup; /* Now setup the send buffer. */ - buf_size = device_info->send_sections * net_device->send_section_size; + buf_size = device_info->send_sections * device_info->send_section_size; buf_size = round_up(buf_size, PAGE_SIZE); net_device->send_buf = vzalloc(buf_size); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index d4902ee5f260..a32ae02e1b6c 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -848,7 +848,9 @@ static int netvsc_set_channels(struct net_device *net, device_info.num_chn = count; device_info.ring_size = ring_size; device_info.send_sections = nvdev->send_section_cnt; + device_info.send_section_size = nvdev->send_section_size; device_info.recv_sections = nvdev->recv_section_cnt; + device_info.recv_section_size = nvdev->recv_section_size; rndis_filter_device_remove(dev, nvdev); @@ -963,7 +965,9 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) device_info.ring_size = ring_size; device_info.num_chn = nvdev->num_chn; device_info.send_sections = nvdev->send_section_cnt; + device_info.send_section_size = nvdev->send_section_size; device_info.recv_sections = nvdev->recv_section_cnt; + device_info.recv_section_size = nvdev->recv_section_size; rndis_filter_device_remove(hdev, nvdev); @@ -1485,7 +1489,9 @@ static int netvsc_set_ringparam(struct net_device *ndev, device_info.num_chn = nvdev->num_chn; device_info.ring_size = ring_size; device_info.send_sections = new_tx; + device_info.send_section_size = nvdev->send_section_size; device_info.recv_sections = new_rx; + device_info.recv_section_size = nvdev->recv_section_size; netif_device_detach(ndev); was_opened = rndis_filter_opened(nvdev); @@ -1934,7 +1940,9 @@ static int netvsc_probe(struct hv_device *dev, device_info.ring_size = ring_size; device_info.num_chn = VRSS_CHANNEL_DEFAULT; device_info.send_sections = NETVSC_DEFAULT_TX; + device_info.send_section_size = NETVSC_SEND_SECTION_SIZE; device_info.recv_sections = NETVSC_DEFAULT_RX; + device_info.recv_section_size = NETVSC_RECV_SECTION_SIZE; nvdev = rndis_filter_device_add(dev, &device_info); if (IS_ERR(nvdev)) { From 4a7a3860caac1a8779e8c459d8abe21b111798d6 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 20 Sep 2017 15:32:53 -0500 Subject: [PATCH 200/279] net: qcom/emac: add software control for pause frame mode The EMAC has the option of sending only a single pause frame when flow control is enabled and the RX queue is full. Although sending only one pause frame has little value, this would allow admins to enable automatic flow control without having to worry about the EMAC flooding nearby switches with pause frames if the kernel hangs. The option is enabled by using the single-pause-mode private flag. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- .../net/ethernet/qualcomm/emac/emac-ethtool.c | 30 +++++++++++++++++++ drivers/net/ethernet/qualcomm/emac/emac-mac.c | 22 ++++++++++++++ drivers/net/ethernet/qualcomm/emac/emac.c | 3 ++ drivers/net/ethernet/qualcomm/emac/emac.h | 3 ++ 4 files changed, 58 insertions(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c index bbe24639aa5a..c8c6231b87f3 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-ethtool.c @@ -88,6 +88,8 @@ static void emac_set_msglevel(struct net_device *netdev, u32 data) static int emac_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { + case ETH_SS_PRIV_FLAGS: + return 1; case ETH_SS_STATS: return EMAC_STATS_LEN; default: @@ -100,6 +102,10 @@ static void emac_get_strings(struct net_device *netdev, u32 stringset, u8 *data) unsigned int i; switch (stringset) { + case ETH_SS_PRIV_FLAGS: + strcpy(data, "single-pause-mode"); + break; + case ETH_SS_STATS: for (i = 0; i < EMAC_STATS_LEN; i++) { strlcpy(data, emac_ethtool_stat_strings[i], @@ -230,6 +236,27 @@ static int emac_get_regs_len(struct net_device *netdev) return EMAC_MAX_REG_SIZE * sizeof(u32); } +#define EMAC_PRIV_ENABLE_SINGLE_PAUSE BIT(0) + +static int emac_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + adpt->single_pause_mode = !!(flags & EMAC_PRIV_ENABLE_SINGLE_PAUSE); + + if (netif_running(netdev)) + return emac_reinit_locked(adpt); + + return 0; +} + +static u32 emac_get_priv_flags(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + return adpt->single_pause_mode ? EMAC_PRIV_ENABLE_SINGLE_PAUSE : 0; +} + static const struct ethtool_ops emac_ethtool_ops = { .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, @@ -253,6 +280,9 @@ static const struct ethtool_ops emac_ethtool_ops = { .get_regs_len = emac_get_regs_len, .get_regs = emac_get_regs, + + .set_priv_flags = emac_set_priv_flags, + .get_priv_flags = emac_get_priv_flags, }; void emac_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index bcd4708b3745..0ea3ca09c689 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -551,6 +551,28 @@ static void emac_mac_start(struct emac_adapter *adpt) mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL | DEBUG_MODE | SINGLE_PAUSE_MODE); + /* Enable single-pause-frame mode if requested. + * + * If enabled, the EMAC will send a single pause frame when the RX + * queue is full. This normally leads to packet loss because + * the pause frame disables the remote MAC only for 33ms (the quanta), + * and then the remote MAC continues sending packets even though + * the RX queue is still full. + * + * If disabled, the EMAC sends a pause frame every 31ms until the RX + * queue is no longer full. Normally, this is the preferred + * method of operation. However, when the system is hung (e.g. + * cores are halted), the EMAC interrupt handler is never called + * and so the RX queue fills up quickly and stays full. The resuling + * non-stop "flood" of pause frames sometimes has the effect of + * disabling nearby switches. In some cases, other nearby switches + * are also affected, shutting down the entire network. + * + * The user can enable or disable single-pause-frame mode + * via ethtool. + */ + mac |= adpt->single_pause_mode ? SINGLE_PAUSE_MODE : 0; + writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1); writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 60850bfa3d32..759543512117 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -443,6 +443,9 @@ static void emac_init_adapter(struct emac_adapter *adpt) /* default to automatic flow control */ adpt->automatic = true; + + /* Disable single-pause-frame mode by default */ + adpt->single_pause_mode = false; } /* Get the clock */ diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h index 8ee4ec6aef2e..d7c9f44209d4 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.h +++ b/drivers/net/ethernet/qualcomm/emac/emac.h @@ -363,6 +363,9 @@ struct emac_adapter { bool tx_flow_control; bool rx_flow_control; + /* True == use single-pause-frame mode. */ + bool single_pause_mode; + /* Ring parameter */ u8 tpd_burst; u8 rfd_burst; From 19cab8872692960535aa6d12e3a295ac51d1a648 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Sep 2017 15:52:13 -0700 Subject: [PATCH 201/279] net: ethtool: Add back transceiver type Commit 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") deprecated the ethtool_cmd::transceiver field, which was fine in premise, except that the PHY library was actually using it to report the type of transceiver: internal or external. Use the first word of the reserved field to put this __u8 transceiver field back in. It is made read-only, and we don't expect the ETHTOOL_xLINKSETTINGS API to be doing anything with this anyway, so this is mostly for the legacy path where we do: ethtool_get_settings() -> dev->ethtool_ops->get_link_ksettings() -> convert_link_ksettings_to_legacy_settings() to have no information loss compared to the legacy get_settings API. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 6 +++++- net/core/ethtool.c | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 9c041dae8e2c..5bd1b1de4ea0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1753,6 +1753,8 @@ enum ethtool_reset_flags { * %ethtool_link_mode_bit_indices for the link modes, and other * link features that the link partner advertised through * autonegotiation; 0 if unknown or not applicable. Read-only. + * @transceiver: Used to distinguish different possible PHY types, + * reported consistently by PHYLIB. Read-only. * * If autonegotiation is disabled, the speed and @duplex represent the * fixed link mode and are writable if the driver supports multiple @@ -1804,7 +1806,9 @@ struct ethtool_link_settings { __u8 eth_tp_mdix; __u8 eth_tp_mdix_ctrl; __s8 link_mode_masks_nwords; - __u32 reserved[8]; + __u8 transceiver; + __u8 reserved1[3]; + __u32 reserved[7]; __u32 link_mode_masks[0]; /* layout of link_mode_masks fields: * __u32 map_supported[link_mode_masks_nwords]; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6a582ae4c5d9..3228411ada0f 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -525,6 +525,8 @@ convert_link_ksettings_to_legacy_settings( = link_ksettings->base.eth_tp_mdix; legacy_settings->eth_tp_mdix_ctrl = link_ksettings->base.eth_tp_mdix_ctrl; + legacy_settings->transceiver + = link_ksettings->base.transceiver; return retval; } From ceb628134a75564d7bfa8e4ef902e6e588339e11 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 20 Sep 2017 15:52:14 -0700 Subject: [PATCH 202/279] net: phy: Keep reporting transceiver type With commit 2d55173e71b0 ("phy: add generic function to support ksetting support"), we lost the ability to report the transceiver type like we used to. Now that we have added back the transceiver type to ethtool_link_settings, we can report it back like we used to and have no loss of information. Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API") Fixes: 2d55173e71b0 ("phy: add generic function to support ksetting support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e842d2cd1ee7..2b1e67bc1e73 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -373,7 +373,8 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.port = PORT_BNC; else cmd->base.port = PORT_MII; - + cmd->base.transceiver = phy_is_internal(phydev) ? + XCVR_INTERNAL : XCVR_EXTERNAL; cmd->base.phy_address = phydev->mdio.addr; cmd->base.autoneg = phydev->autoneg; cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl; From 8a7ffeb795f864dd605b579c05934cba95dc8ad3 Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Thu, 21 Sep 2017 02:36:36 +0530 Subject: [PATCH 203/279] lan78xx: Fix for eeprom read/write when device auto suspend Fix for eeprom read/write when device auto suspend Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Nisar Sayed Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index b99a7fb09f8e..fcf85ae37435 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1265,30 +1265,46 @@ static int lan78xx_ethtool_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct lan78xx_net *dev = netdev_priv(netdev); + int ret; + + ret = usb_autopm_get_interface(dev->intf); + if (ret) + return ret; ee->magic = LAN78XX_EEPROM_MAGIC; - return lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data); + ret = lan78xx_read_raw_eeprom(dev, ee->offset, ee->len, data); + + usb_autopm_put_interface(dev->intf); + + return ret; } static int lan78xx_ethtool_set_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, u8 *data) { struct lan78xx_net *dev = netdev_priv(netdev); + int ret; + + ret = usb_autopm_get_interface(dev->intf); + if (ret) + return ret; /* Allow entire eeprom update only */ if ((ee->magic == LAN78XX_EEPROM_MAGIC) && (ee->offset == 0) && (ee->len == 512) && (data[0] == EEPROM_INDICATOR)) - return lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data); + ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data); else if ((ee->magic == LAN78XX_OTP_MAGIC) && (ee->offset == 0) && (ee->len == 512) && (data[0] == OTP_INDICATOR_1)) - return lan78xx_write_raw_otp(dev, ee->offset, ee->len, data); + ret = lan78xx_write_raw_otp(dev, ee->offset, ee->len, data); - return -EINVAL; + usb_autopm_put_interface(dev->intf); + + return ret; } static void lan78xx_get_strings(struct net_device *netdev, u32 stringset, From c077682282dd34c75e8477c22dffe7c9aebc6e98 Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Thu, 21 Sep 2017 02:36:37 +0530 Subject: [PATCH 204/279] lan78xx: Allow EEPROM write for less than MAX_EEPROM_SIZE Allow EEPROM write for less than MAX_EEPROM_SIZE Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Nisar Sayed Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index fcf85ae37435..f8c63eec8353 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1290,11 +1290,10 @@ static int lan78xx_ethtool_set_eeprom(struct net_device *netdev, if (ret) return ret; - /* Allow entire eeprom update only */ - if ((ee->magic == LAN78XX_EEPROM_MAGIC) && - (ee->offset == 0) && - (ee->len == 512) && - (data[0] == EEPROM_INDICATOR)) + /* Invalid EEPROM_INDICATOR at offset zero will result in a failure + * to load data from EEPROM + */ + if (ee->magic == LAN78XX_EEPROM_MAGIC) ret = lan78xx_write_raw_eeprom(dev, ee->offset, ee->len, data); else if ((ee->magic == LAN78XX_OTP_MAGIC) && (ee->offset == 0) && From e365280521029c9366bab038915274ddaa1b7195 Mon Sep 17 00:00:00 2001 From: Nisar Sayed Date: Thu, 21 Sep 2017 02:36:38 +0530 Subject: [PATCH 205/279] lan78xx: Use default values loaded from EEPROM/OTP after reset Use default value of auto duplex and auto speed values loaded from EEPROM/OTP after reset. The LAN78xx allows platform configurations to be loaded from EEPROM/OTP. Ex: When external phy is connected, the MAC can be configured to have correct auto speed, auto duplex, auto polarity configured from the EEPROM/OTP. Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") Signed-off-by: Nisar Sayed Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f8c63eec8353..0161f77641fa 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2449,7 +2449,6 @@ static int lan78xx_reset(struct lan78xx_net *dev) /* LAN7801 only has RGMII mode */ if (dev->chipid == ID_REV_CHIP_ID_7801_) buf &= ~MAC_CR_GMII_EN_; - buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_; ret = lan78xx_write_reg(dev, MAC_CR, buf); ret = lan78xx_read_reg(dev, MAC_TX, &buf); From f0ef1f4f2b772c0a1c8b35a6ae3edf974cc110dd Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 21 Sep 2017 08:24:27 +0200 Subject: [PATCH 206/279] net: stmmac: Cocci spatch "of_table" Make sure (of/i2c/platform)_device_id tables are NULL terminated. Found by coccinelle spatch "misc/of_table.cocci" Signed-off-by: Thomas Meyer Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index a366b3747eeb..8a280b48e3a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -315,6 +315,7 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat, { .compatible = "allwinner,sun8i-h3-emac" }, { .compatible = "allwinner,sun8i-v3s-emac" }, { .compatible = "allwinner,sun50i-a64-emac" }, + {}, }; /* If phy-handle property is passed from DT, use it as the PHY */ From 09579ac803a3638344b8544b5940793d5358673e Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Sep 2017 09:16:26 +0200 Subject: [PATCH 207/279] net/smc: add missing dev_put In the infiniband part, SMC currently uses get_netdev which calls dev_hold on the returned net device. However, the SMC code never calls dev_put on that net device resulting in a wrong reference count. This patch adds a dev_put after the usage of the net device to fix the issue. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_ib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 547e0e113b17..0b5852299158 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -380,6 +380,7 @@ static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport); if (ndev) { memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN); + dev_put(ndev); } else if (!rc) { memcpy(&smcibdev->mac[ibport - 1][0], &smcibdev->gid[ibport - 1].raw[8], 3); From 846e344eb7229018457d6d6fc1ab0cc0a167692f Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 21 Sep 2017 09:16:27 +0200 Subject: [PATCH 208/279] net/smc: add receive timeout check The SMC receive function currently lacks a timeout check under the condition that no data were received and no data are available. This patch adds such a check. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index b17a333e9bb0..3e631ae4b6b6 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -148,6 +148,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, read_done = sock_intr_errno(timeo); break; } + if (!timeo) + return -EAGAIN; } if (!atomic_read(&conn->bytes_to_rcv)) { From 731b008560e6dfaf5fb297543f17bbe9bb868f3c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:28 +0200 Subject: [PATCH 209/279] net/smc: take RCU read lock for routing cache lookup smc_netinfo_by_tcpsk() looks up the routing cache. Such a lookup requires protection by an RCU read lock. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8c6d24b2995d..2e8d2dabac0c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, u8 *prefix_len) { struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct in_device *in_dev; struct sockaddr_in addr; int rc = -ENOENT; int len; @@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock, /* get address to which the internal TCP socket is bound */ kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); /* analyze IPv4 specific data of net_device belonging to TCP socket */ - for_ifa(dst->dev->ip_ptr) { - if (ifa->ifa_address != addr.sin_addr.s_addr) + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dst->dev); + for_ifa(in_dev) { + if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) continue; *prefix_len = inet_mask_len(ifa->ifa_mask); *subnet = ifa->ifa_address & ifa->ifa_mask; rc = 0; break; - } endfor_ifa(dst->dev->ip_ptr); + } endfor_ifa(in_dev); + rcu_read_unlock(); out_rel: dst_release(dst); From a6832c3acdb2ceb099ec3c385777fbaa6d5a5fd6 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:29 +0200 Subject: [PATCH 210/279] net/smc: adjust net_device refcount smc_pnet_fill_entry() uses dev_get_by_name() adding a refcount to ndev. The following smc_pnet_enter() has to reduce the refcount if the entry to be added exists already in the pnet table. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 78f7af28ae4f..31f8453c25c5 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -181,8 +181,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) sizeof(new_pnetelem->ndev->name)) || smc_pnet_same_ibname(pnetelem, new_pnetelem->smcibdev->ibdev->name, - new_pnetelem->ib_port)) + new_pnetelem->ib_port)) { + dev_put(pnetelem->ndev); goto found; + } } list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); rc = 0; From 8301fa44b41b1ca46a0547809eb173112559dc51 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:30 +0200 Subject: [PATCH 211/279] net/smc: adapt send request completion notification The solicited flag is meaningful for the receive completion queue. Ask for next work completion of any type on the send queue. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index ab56bda66783..525d91e0d57e 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -244,7 +244,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) int rc; ib_req_notify_cq(link->smcibdev->roce_cq_send, - IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS); + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); From 5bc11ddbdf7fc6681db5c3f9a92cdee0f19cee1e Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:31 +0200 Subject: [PATCH 212/279] net/smc: longer delay for client link group removal Client link group creation always follows the server linkgroup creation. If peer creates a new server link group, client has to create a new client link group. If peer reuses a server link group for a new connection, client has to reuse its client link group as well. This patch introduces a longer delay for client link group removal to make sure this link group still exists, once the peer decides to reuse a server link group. This avoids out-of-sync conditions for link groups. If already scheduled, modify the delay. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 1a16d51e2330..20b66e79c5d6 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -25,8 +25,9 @@ #include "smc_cdc.h" #include "smc_close.h" -#define SMC_LGR_NUM_INCR 256 -#define SMC_LGR_FREE_DELAY (600 * HZ) +#define SMC_LGR_NUM_INCR 256 +#define SMC_LGR_FREE_DELAY_SERV (600 * HZ) +#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) static u32 smc_lgr_num; /* unique link group number */ @@ -107,8 +108,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - if (reduced && !lgr->conns_num) - schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY); + if (!reduced || lgr->conns_num) + return; + /* client link group creation always follows the server link group + * creation. For client use a somewhat higher removal delay time, + * otherwise there is a risk of out-of-sync link groups. + */ + mod_delayed_work(system_wq, &lgr->free_work, + lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : + SMC_LGR_FREE_DELAY_SERV); } static void smc_lgr_free_work(struct work_struct *work) From bfbedfd38378c1ad9df84469403d69c17b074b66 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:32 +0200 Subject: [PATCH 213/279] net/smc: terminate link group if out-of-sync is received An out-of-sync condition can just be detected by the client. If the server receives a CLC DECLINE message indicating an out-of-sync condition for the link groups, the server must clean up the out-of-sync link group. There is no need for an extra third parameter in smc_clc_send_decline(). Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 6 ++---- net/smc/smc_clc.c | 10 +++++----- net/smc/smc_clc.h | 3 +-- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 2e8d2dabac0c..745f145d4c4d 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -513,7 +513,7 @@ decline_rdma: /* RDMA setup failed, switch back to TCP */ smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(smc, reason_code, 0); + rc = smc_clc_send_decline(smc, reason_code); if (rc < sizeof(struct smc_clc_msg_decline)) goto out_err; } @@ -808,8 +808,6 @@ static void smc_listen_work(struct work_struct *work) rc = local_contact; if (rc == -ENOMEM) reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ - else if (rc == -ENOLINK) - reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ goto decline_rdma; } link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; @@ -903,7 +901,7 @@ decline_rdma: smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code, 0); + rc = smc_clc_send_decline(new_smc, reason_code); if (rc < sizeof(struct smc_clc_msg_decline)) goto out_err; } diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 3934913ab835..b7dd2743fb5c 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -95,9 +95,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } if (clcm->type == SMC_CLC_DECLINE) { reason_code = SMC_CLC_DECL_REPLY; - if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis) - == SMC_CLC_DECL_SYNCERR) + if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { smc->conn.lgr->sync_err = true; + smc_lgr_terminate(smc->conn.lgr); + } } out: @@ -105,8 +106,7 @@ out: } /* send CLC DECLINE message across internal TCP socket */ -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, - u8 out_of_sync) +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) { struct smc_clc_msg_decline dclc; struct msghdr msg; @@ -118,7 +118,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, dclc.hdr.type = SMC_CLC_DECLINE; dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = SMC_CLC_V1; - dclc.hdr.flag = out_of_sync ? 1 : 0; + dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 13db8ce177c9..1c55414041d4 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -106,8 +106,7 @@ struct smc_ib_device; int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, - u8 out_of_sync); +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport); int smc_clc_send_confirm(struct smc_sock *smc); From 18e537cd58e8d6932719bfa79cb96a1fbc639199 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:33 +0200 Subject: [PATCH 214/279] net/smc: introduce a delay The number of outstanding work requests is limited. If all work requests are in use, tx processing is postponed to another scheduling of the tx worker. Switch to a delayed worker to have a gap for tx completion queue events before the next retry. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc.h | 2 +- net/smc/smc_close.c | 12 +++++++----- net/smc/smc_tx.c | 12 ++++++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index 6e44313e4467..0ccd6fa387ad 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -149,7 +149,7 @@ struct smc_connection { atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ spinlock_t send_lock; /* protect wr_sends */ - struct work_struct tx_work; /* retry of smc_cdc_msg_send */ + struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. * .prod cf. TCP rcv_nxt diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 3c2e166b5d22..5201bc103bd8 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -208,7 +208,7 @@ again: case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); if (sk->sk_state == SMC_ACTIVE) { /* send close request */ @@ -234,7 +234,7 @@ again: if (!smc_cdc_rxed_any_close(conn)) smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); if (sk->sk_err != ECONNABORTED) { /* confirm close from peer */ @@ -263,7 +263,9 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - cancel_work_sync(&conn->tx_work); + release_sock(sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; smc_close_wait_tx_pends(smc); @@ -425,7 +427,7 @@ again: case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); /* send close wr request */ rc = smc_close_wr(conn); @@ -439,7 +441,7 @@ again: if (!smc_cdc_rxed_any_close(conn)) smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); /* confirm close from peer */ rc = smc_close_wr(conn); diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 3c656beb8820..3866573288dd 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -24,6 +24,8 @@ #include "smc_cdc.h" #include "smc_tx.h" +#define SMC_TX_WORK_DELAY HZ + /***************************** sndbuf producer *******************************/ /* callback implementation for sk.sk_write_space() @@ -406,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_work(&conn->tx_work); + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -430,7 +433,7 @@ out_unlock: */ static void smc_tx_work(struct work_struct *work) { - struct smc_connection *conn = container_of(work, + struct smc_connection *conn = container_of(to_delayed_work(work), struct smc_connection, tx_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); @@ -468,7 +471,8 @@ void smc_tx_consumer_update(struct smc_connection *conn) if (!rc) rc = smc_cdc_msg_send(conn, wr_buf, pend); if (rc < 0) { - schedule_work(&conn->tx_work); + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); return; } smc_curs_write(&conn->rx_curs_confirmed, @@ -487,6 +491,6 @@ void smc_tx_consumer_update(struct smc_connection *conn) void smc_tx_init(struct smc_sock *smc) { smc->sk.sk_write_space = smc_tx_write_space; - INIT_WORK(&smc->conn.tx_work, smc_tx_work); + INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); spin_lock_init(&smc->conn.send_lock); } From 8c96feeeb39ba0b89c6121f71e8f7aa2a4d46671 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 21 Sep 2017 09:16:34 +0200 Subject: [PATCH 215/279] net/smc: no close wait in case of process shut down Usually socket closing is delayed if there is still data available in the send buffer to be transmitted. If a process is killed, the delay should be avoided. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 5201bc103bd8..f0d16fb825f7 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -174,15 +174,15 @@ int smc_close_active(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; struct smc_connection *conn = &smc->conn; struct sock *sk = &smc->sk; int old_state; + long timeout; int rc = 0; - if (sock_flag(sk, SOCK_LINGER) && - !(current->flags & PF_EXITING)) - timeout = sk->sk_lingertime; + timeout = current->flags & PF_EXITING ? + 0 : sock_flag(sk, SOCK_LINGER) ? + sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; again: old_state = sk->sk_state; @@ -413,13 +413,14 @@ void smc_close_sock_put_work(struct work_struct *work) int smc_close_shutdown_write(struct smc_sock *smc) { struct smc_connection *conn = &smc->conn; - long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; struct sock *sk = &smc->sk; int old_state; + long timeout; int rc = 0; - if (sock_flag(sk, SOCK_LINGER)) - timeout = sk->sk_lingertime; + timeout = current->flags & PF_EXITING ? + 0 : sock_flag(sk, SOCK_LINGER) ? + sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; again: old_state = sk->sk_state; From e8b95728f724797f958912fd9b765a695595d3a6 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 1 Sep 2017 17:13:43 -0700 Subject: [PATCH 216/279] Input: uinput - avoid FF flush when destroying device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Normally, when input device supporting force feedback effects is being destroyed, we try to "flush" currently playing effects, so that the physical device does not continue vibrating (or executing other effects). Unfortunately this does not work well for uinput as flushing of the effects deadlocks with the destroy action: - if device is being destroyed because the file descriptor is being closed, then there is noone to even service FF requests; - if device is being destroyed because userspace sent UI_DEV_DESTROY, while theoretically it could be possible to service FF requests, userspace is unlikely to do so (they'd need to make sure FF handling happens on a separate thread) even if kernel solves the issue with FF ioctls deadlocking with UI_DEV_DESTROY ioctl on udev->mutex. To avoid lockups like the one below, let's install a custom input device flush handler, and avoid trying to flush force feedback effects when we destroying the device, and instead rely on uinput to shut off the device properly. NMI watchdog: Watchdog detected hard LOCKUP on cpu 3 ... <> [] _raw_spin_lock_irqsave+0x37/0x40 [] complete+0x1d/0x50 [] uinput_request_done+0x3c/0x40 [uinput] [] uinput_request_submit.part.7+0x47/0xb0 [uinput] [] uinput_dev_erase_effect+0x5b/0x76 [uinput] [] erase_effect+0xad/0xf0 [] flush_effects+0x4d/0x90 [] input_flush_device+0x40/0x60 [] evdev_cleanup+0xac/0xc0 [] evdev_disconnect+0x2b/0x60 [] __input_unregister_device+0xac/0x150 [] input_unregister_device+0x47/0x70 [] uinput_destroy_device+0xb5/0xc0 [uinput] [] uinput_ioctl_handler.isra.9+0x65e/0x740 [uinput] [] ? do_futex+0x12b/0xad0 [] uinput_ioctl+0x18/0x20 [uinput] [] do_vfs_ioctl+0x298/0x480 [] ? security_file_ioctl+0x43/0x60 [] SyS_ioctl+0x79/0x90 [] entry_SYSCALL_64_fastpath+0x12/0x71 Reported-by: Rodrigo Rivas Costa Reported-by: Clément VUCHENER Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193741 Signed-off-by: Dmitry Torokhov --- drivers/input/ff-core.c | 13 ++++++++++--- drivers/input/misc/uinput.c | 18 ++++++++++++++++++ include/linux/input.h | 1 + 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c index 8f2042432c85..66a46c84e28f 100644 --- a/drivers/input/ff-core.c +++ b/drivers/input/ff-core.c @@ -237,9 +237,15 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file) EXPORT_SYMBOL_GPL(input_ff_erase); /* - * flush_effects - erase all effects owned by a file handle + * input_ff_flush - erase all effects owned by a file handle + * @dev: input device to erase effect from + * @file: purported owner of the effects + * + * This function erases all force-feedback effects associated with + * the given owner from specified device. Note that @file may be %NULL, + * in which case all effects will be erased. */ -static int flush_effects(struct input_dev *dev, struct file *file) +int input_ff_flush(struct input_dev *dev, struct file *file) { struct ff_device *ff = dev->ff; int i; @@ -255,6 +261,7 @@ static int flush_effects(struct input_dev *dev, struct file *file) return 0; } +EXPORT_SYMBOL_GPL(input_ff_flush); /** * input_ff_event() - generic handler for force-feedback events @@ -343,7 +350,7 @@ int input_ff_create(struct input_dev *dev, unsigned int max_effects) mutex_init(&ff->mutex); dev->ff = ff; - dev->flush = flush_effects; + dev->flush = input_ff_flush; dev->event = input_ff_event; __set_bit(EV_FF, dev->evbit); diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 022be0e22eba..2cff40be8860 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -230,6 +230,18 @@ static int uinput_dev_erase_effect(struct input_dev *dev, int effect_id) return uinput_request_submit(udev, &request); } +static int uinput_dev_flush(struct input_dev *dev, struct file *file) +{ + /* + * If we are called with file == NULL that means we are tearing + * down the device, and therefore we can not handle FF erase + * requests: either we are handling UI_DEV_DESTROY (and holding + * the udev->mutex), or the file descriptor is closed and there is + * nobody on the other side anymore. + */ + return file ? input_ff_flush(dev, file) : 0; +} + static void uinput_destroy_device(struct uinput_device *udev) { const char *name, *phys; @@ -297,6 +309,12 @@ static int uinput_create_device(struct uinput_device *udev) dev->ff->playback = uinput_dev_playback; dev->ff->set_gain = uinput_dev_set_gain; dev->ff->set_autocenter = uinput_dev_set_autocenter; + /* + * The standard input_ff_flush() implementation does + * not quite work for uinput as we can't reasonably + * handle FF requests during device teardown. + */ + dev->flush = uinput_dev_flush; } error = input_register_device(udev->dev); diff --git a/include/linux/input.h b/include/linux/input.h index a65e3b24fb18..fb5e23c7ed98 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code, int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file); int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file); +int input_ff_flush(struct input_dev *dev, struct file *file); int input_ff_create_memless(struct input_dev *dev, void *data, int (*play_effect)(struct input_dev *, void *, struct ff_effect *)); From 6b4877c7bdc6ae39ce03716df7caeecf204697eb Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 6 Sep 2017 16:22:59 -0700 Subject: [PATCH 217/279] Input: uinput - avoid crash when sending FF request to device going away If FF request comes in while uinput device is going away, uinput_request_send() will fail with -ENODEV, and uinput_request_submit() will attempt to mark the slot as unused by calling uinput_request_done(). Unfortunately in this case we haven't initialized request->done completion yet, and we get a crash: [ 39.402036] BUG: spinlock bad magic on CPU#1, fftest/3108 [ 39.402046] lock: 0xffff88006a93bb00, .magic: 00000000, .owner: /39, .owner_cpu: 1217155072 [ 39.402055] CPU: 1 PID: 3108 Comm: fftest Tainted: G W 4.13.0+ #15 [ 39.402059] Hardware name: LENOVO 20HQS0EG02/20HQS0EG02, BIOS N1MET37W (1.22 ) 07/04/2017 [ 39.402064] 0000000000000086 f0fad82f3ceaa120 ffff88006a93b9a0 ffffffff9de941bb [ 39.402077] ffff88026df8ae00 ffff88006a93bb00 ffff88006a93b9c0 ffffffff9dca62b7 [ 39.402088] ffff88006a93bb00 ffff88006a93baf8 ffff88006a93b9e0 ffffffff9dca62e7 [ 39.402099] Call Trace: [ 39.402112] [] dump_stack+0x4d/0x63 [ 39.402123] [] spin_dump+0x97/0x9c [ 39.402130] [] spin_bug+0x2b/0x2d [ 39.402138] [] do_raw_spin_lock+0x28/0xfd [ 39.402147] [] _raw_spin_lock_irqsave+0x19/0x1f [ 39.402154] [] complete+0x1d/0x48 [ 39.402162] [] 0xffffffffc04f30af [ 39.402167] [] 0xffffffffc04f468c [ 39.402177] [] ? __slab_free+0x22f/0x359 [ 39.402184] [] ? tk_clock_read+0xc/0xe [ 39.402189] [] 0xffffffffc04f471f [ 39.402195] [] ? __wake_up+0x44/0x4b [ 39.402200] [] ? 0xffffffffc04f3240 [ 39.402207] [] erase_effect+0xa1/0xd2 [ 39.402214] [] input_ff_flush+0x43/0x5c [ 39.402219] [] 0xffffffffc04f32ad [ 39.402227] [] input_flush_device+0x3d/0x51 [ 39.402234] [] evdev_flush+0x49/0x5c [ 39.402243] [] filp_close+0x3f/0x65 [ 39.402253] [] put_files_struct+0x66/0xc1 [ 39.402261] [] exit_files+0x47/0x4e [ 39.402270] [] do_exit+0x483/0x969 [ 39.402278] [] ? recalc_sigpending_tsk+0x3d/0x44 [ 39.402285] [] do_group_exit+0x42/0xb0 [ 39.402293] [] get_signal+0x58d/0x5bf [ 39.402300] [] do_signal+0x37/0x53e [ 39.402307] [] ? evdev_ioctl_handler+0xac8/0xb04 [ 39.402314] [] ? evdev_ioctl+0x10/0x12 [ 39.402321] [] ? do_vfs_ioctl+0x42e/0x501 [ 39.402328] [] prepare_exit_to_usermode+0x66/0x90 [ 39.402333] [] syscall_return_slowpath+0xe3/0xec [ 39.402339] [] int_ret_from_sys_call+0x25/0x8f While we could solve this by simply initializing the completion earlier, we are better off rearranging the code a bit so we avoid calling complete() on requests that we did not send out. This patch consolidates marking request slots as free in one place (in uinput_request_submit(), the same place where we acquire them) and having everyone else simply signal completion of the requests. Fixes: 00ce756ce53a ("Input: uinput - mark failed submission requests as free") Signed-off-by: Dmitry Torokhov --- drivers/input/misc/uinput.c | 39 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index 2cff40be8860..443151de90c6 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -98,14 +98,15 @@ static int uinput_request_reserve_slot(struct uinput_device *udev, uinput_request_alloc_id(udev, request)); } -static void uinput_request_done(struct uinput_device *udev, - struct uinput_request *request) +static void uinput_request_release_slot(struct uinput_device *udev, + unsigned int id) { /* Mark slot as available */ - udev->requests[request->id] = NULL; - wake_up(&udev->requests_waitq); + spin_lock(&udev->requests_lock); + udev->requests[id] = NULL; + spin_unlock(&udev->requests_lock); - complete(&request->done); + wake_up(&udev->requests_waitq); } static int uinput_request_send(struct uinput_device *udev, @@ -138,20 +139,22 @@ static int uinput_request_send(struct uinput_device *udev, static int uinput_request_submit(struct uinput_device *udev, struct uinput_request *request) { - int error; + int retval; - error = uinput_request_reserve_slot(udev, request); - if (error) - return error; + retval = uinput_request_reserve_slot(udev, request); + if (retval) + return retval; - error = uinput_request_send(udev, request); - if (error) { - uinput_request_done(udev, request); - return error; - } + retval = uinput_request_send(udev, request); + if (retval) + goto out; wait_for_completion(&request->done); - return request->retval; + retval = request->retval; + + out: + uinput_request_release_slot(udev, request->id); + return retval; } /* @@ -169,7 +172,7 @@ static void uinput_flush_requests(struct uinput_device *udev) request = udev->requests[i]; if (request) { request->retval = -ENODEV; - uinput_request_done(udev, request); + complete(&request->done); } } @@ -957,7 +960,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd, } req->retval = ff_up.retval; - uinput_request_done(udev, req); + complete(&req->done); goto out; case UI_END_FF_ERASE: @@ -973,7 +976,7 @@ static long uinput_ioctl_handler(struct file *file, unsigned int cmd, } req->retval = ff_erase.retval; - uinput_request_done(udev, req); + complete(&req->done); goto out; } From 059fbe8b5171960bd5c2371bb327ac18733773de Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 Sep 2017 13:27:02 +0200 Subject: [PATCH 218/279] net: phy: Fix truncation of large IRQ numbers in phy_attached_print() Given NR_IRQS is 2048 on sparc64, and even 32784 on alpha, 3 digits is not enough to represent interrupt numbers on all architectures. Hence PHY interrupt numbers may be truncated during printing. Increase the buffer size from 4 to 8 bytes to fix this. Fixes: 5e369aefdce4818c ("net: stmmac: Delete dead code for MDIO registration") Signed-off-by: Geert Uytterhoeven Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8cf0c5901f95..67f25ac29025 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -879,7 +879,7 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...) { const char *drv_name = phydev->drv ? phydev->drv->name : "unbound"; char *irq_str; - char irq_num[4]; + char irq_num[8]; switch(phydev->irq) { case PHY_POLL: From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Sep 2017 09:15:46 -0700 Subject: [PATCH 219/279] net: prevent dst uses after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In linux-4.13, Wei worked hard to convert dst to a traditional refcounted model, removing GC. We now want to make sure a dst refcount can not transition from 0 back to 1. The problem here is that input path attached a not refcounted dst to an skb. Then later, because packet is forwarded and hits skb_dst_force() before exiting RCU section, we might try to take a refcount on one dst that is about to be freed, if another cpu saw 1 -> 0 transition in dst_release() and queued the dst for freeing after one RCU grace period. Lets unify skb_dst_force() and skb_dst_force_safe(), since we should always perform the complete check against dst refcount, and not assume it is not zero. Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005 [ 989.919496] skb_dst_force+0x32/0x34 [ 989.919498] __dev_queue_xmit+0x1ad/0x482 [ 989.919501] ? eth_header+0x28/0xc6 [ 989.919502] dev_queue_xmit+0xb/0xd [ 989.919504] neigh_connected_output+0x9b/0xb4 [ 989.919507] ip_finish_output2+0x234/0x294 [ 989.919509] ? ipt_do_table+0x369/0x388 [ 989.919510] ip_finish_output+0x12c/0x13f [ 989.919512] ip_output+0x53/0x87 [ 989.919513] ip_forward_finish+0x53/0x5a [ 989.919515] ip_forward+0x2cb/0x3e6 [ 989.919516] ? pskb_trim_rcsum.part.9+0x4b/0x4b [ 989.919518] ip_rcv_finish+0x2e2/0x321 [ 989.919519] ip_rcv+0x26f/0x2eb [ 989.919522] ? vlan_do_receive+0x4f/0x289 [ 989.919523] __netif_receive_skb_core+0x467/0x50b [ 989.919526] ? tcp_gro_receive+0x239/0x239 [ 989.919529] ? inet_gro_receive+0x226/0x238 [ 989.919530] __netif_receive_skb+0x4d/0x5f [ 989.919532] netif_receive_skb_internal+0x5c/0xaf [ 989.919533] napi_gro_receive+0x45/0x81 [ 989.919536] ixgbe_poll+0xc8a/0xf09 [ 989.919539] ? kmem_cache_free_bulk+0x1b6/0x1f7 [ 989.919540] net_rx_action+0xf4/0x266 [ 989.919543] __do_softirq+0xa8/0x19d [ 989.919545] irq_exit+0x5d/0x6b [ 989.919546] do_IRQ+0x9c/0xb5 [ 989.919548] common_interrupt+0x93/0x93 [ 989.919548] Similarly dst_clone() can use dst_hold() helper to have additional debugging, as a follow up to commit 44ebe79149ff ("net: add debug atomic_inc_not_zero() in dst_hold()") In net-next we will convert dst atomic_t to refcount_t for peace of mind. Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag") Signed-off-by: Eric Dumazet Cc: Wei Wang Reported-by: Paweł Staszewski Bisected-by: Paweł Staszewski Acked-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/net/dst.h | 22 ++++------------------ include/net/route.h | 2 +- include/net/sock.h | 2 +- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 93568bd0a352..06a6765da074 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) - atomic_inc(&dst->__refcnt); + dst_hold(dst); return dst; } @@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb __skb_dst_copy(nskb, oskb->_skb_refdst); } -/** - * skb_dst_force - makes sure skb dst is refcounted - * @skb: buffer - * - * If dst is not yet refcounted, let's do it - */ -static inline void skb_dst_force(struct sk_buff *skb) -{ - if (skb_dst_is_noref(skb)) { - WARN_ON(!rcu_read_lock_held()); - skb->_skb_refdst &= ~SKB_DST_NOREF; - dst_clone(skb_dst(skb)); - } -} - /** * dst_hold_safe - Take a reference on a dst if possible * @dst: pointer to dst entry @@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst) } /** - * skb_dst_force_safe - makes sure skb dst is refcounted + * skb_dst_force - makes sure skb dst is refcounted * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. */ -static inline void skb_dst_force_safe(struct sk_buff *skb) +static inline void skb_dst_force(struct sk_buff *skb) { if (skb_dst_is_noref(skb)) { struct dst_entry *dst = skb_dst(skb); + WARN_ON(!rcu_read_lock_held()); if (!dst_hold_safe(dst)) dst = NULL; diff --git a/include/net/route.h b/include/net/route.h index 1b09a9368c68..57dfc6850d37 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, rcu_read_lock(); err = ip_route_input_noref(skb, dst, src, tos, devin); if (!err) { - skb_dst_force_safe(skb); + skb_dst_force(skb); if (!skb_dst(skb)) err = -EINVAL; } diff --git a/include/net/sock.h b/include/net/sock.h index 03a362568357..a6b9a8d1a6df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk); static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) { /* dont let skb dst not refcounted, we are going to leave rcu lock */ - skb_dst_force_safe(skb); + skb_dst_force(skb); if (!sk->sk_backlog.tail) sk->sk_backlog.head = skb; From c0d05cde2a685cd6486390c62be684ce456d84d6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 11:20:58 +0100 Subject: [PATCH 220/279] iommu/of: Remove PCI host bridge node check of_pci_iommu_init() tries to be clever and stop its alias walk at the device represented by master_np, in case of weird PCI topologies where the bridge to the IOMMU and the rest of the system is not at the root. It turns out this is a bit short-sighted, since there are plenty of other callers of pci_for_each_dma_alias() which would also need the same behaviour in that situation, and the only platform so far with such a topology (Cavium ThunderX2) already solves it more generally via a PCI quirk. As this check is effectively redundant, and returning a boolean value as an int is a bit broken anyway, let's just get rid of it. Reported-by: Jean-Philippe Brucker Fixes: d87beb749281 ("iommu/of: Handle PCI aliases properly") Signed-off-by: Robin Murphy Tested-by: Jean-Philippe Brucker Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e60e3dba85a0..50947ebb6d17 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -157,10 +157,7 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) err = of_iommu_xlate(info->dev, &iommu_spec); of_node_put(iommu_spec.np); - if (err) - return err; - - return info->np == pdev->bus->dev.of_node; + return err; } const struct iommu_ops *of_iommu_configure(struct device *dev, From a88dc7ba15cd4f4ef5102b5185f8fa7ff86e54e1 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 20 Sep 2017 12:26:38 +0530 Subject: [PATCH 221/279] drivers/perf: arm_pmu_acpi: Release memory obtained by kasprintf Free memory region, if arm_pmu_acpi_probe is not successful. Acked-by: Will Deacon Signed-off-by: Arvind Yadav Signed-off-by: Catalin Marinas --- drivers/perf/arm_pmu_acpi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_pmu_acpi.c b/drivers/perf/arm_pmu_acpi.c index 0a9b78705ee8..3303dd8d8eb5 100644 --- a/drivers/perf/arm_pmu_acpi.c +++ b/drivers/perf/arm_pmu_acpi.c @@ -235,6 +235,7 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn) ret = armpmu_register(pmu); if (ret) { pr_warn("Failed to register PMU for CPU%d\n", cpu); + kfree(pmu->name); return ret; } } From e6f9bc34d3779cb7b6a337afed5de8be3f0fab77 Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Thu, 31 Aug 2017 09:30:34 -0700 Subject: [PATCH 222/279] IB/core: Fix for core panic Build with the latest patches resulted in panic: 11384.486289] BUG: unable to handle kernel NULL pointer dereference at (null) [11384.486293] IP: (null) [11384.486295] PGD 0 [11384.486295] P4D 0 [11384.486296] [11384.486299] Oops: 0010 [#1] SMP ......... snip ...... [11384.486401] CPU: 0 PID: 968 Comm: kworker/0:1H Tainted: G W O 4.13.0-a-stream-20170825 #1 [11384.486402] Hardware name: Intel Corporation S2600WT2R/S2600WT2R, BIOS SE5C610.86B.01.01.0014.121820151719 12/18/2015 [11384.486418] Workqueue: ib-comp-wq ib_cq_poll_work [ib_core] [11384.486419] task: ffff880850579680 task.stack: ffffc90007fec000 [11384.486420] RIP: 0010: (null) [11384.486420] RSP: 0018:ffffc90007fef970 EFLAGS: 00010206 [11384.486421] RAX: ffff88084cfe8000 RBX: ffff88084dce4000 RCX: ffffc90007fef978 [11384.486422] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff88084cfe8000 [11384.486422] RBP: ffffc90007fefab0 R08: 0000000000000000 R09: ffff88084dce4080 [11384.486423] R10: ffffffffa02d7f60 R11: 0000000000000000 R12: ffff88105af65a00 [11384.486423] R13: ffff88084dce4000 R14: 000000000000c000 R15: 000000000000c000 [11384.486424] FS: 0000000000000000(0000) GS:ffff88085f400000(0000) knlGS:0000000000000000 [11384.486425] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [11384.486425] CR2: 0000000000000000 CR3: 0000000001c09000 CR4: 00000000001406f0 [11384.486426] Call Trace: [11384.486431] ? is_valid_mcast_lid.isra.21+0xfb/0x110 [ib_core] [11384.486436] ib_attach_mcast+0x6f/0xa0 [ib_core] [11384.486441] ipoib_mcast_attach+0x81/0x190 [ib_ipoib] [11384.486443] ipoib_mcast_join_complete+0x354/0xb40 [ib_ipoib] [11384.486448] mcast_work_handler+0x330/0x6c0 [ib_core] [11384.486452] join_handler+0x101/0x220 [ib_core] [11384.486455] ib_sa_mcmember_rec_callback+0x54/0x80 [ib_core] [11384.486459] recv_handler+0x3a/0x60 [ib_core] [11384.486462] ib_mad_recv_done+0x423/0x9b0 [ib_core] [11384.486466] __ib_process_cq+0x5d/0xb0 [ib_core] [11384.486469] ib_cq_poll_work+0x20/0x60 [ib_core] [11384.486472] process_one_work+0x149/0x360 [11384.486474] worker_thread+0x4d/0x3c0 [11384.486487] kthread+0x109/0x140 [11384.486488] ? rescuer_thread+0x380/0x380 [11384.486489] ? kthread_park+0x60/0x60 [11384.486490] ? kthread_park+0x60/0x60 [11384.486493] ret_from_fork+0x25/0x30 [11384.486493] Code: Bad RIP value. [11384.486493] Code: Bad RIP value. [11384.486496] RIP: (null) RSP: ffffc90007fef970 [11384.486497] CR2: 0000000000000000 [11384.486531] ---[ end trace b1acec6fb4ff6e75 ]--- [11384.532133] Kernel panic - not syncing: Fatal exception [11384.536541] Kernel Offset: disabled [11384.969491] ---[ end Kernel panic - not syncing: Fatal exception [11384.976875] sched: Unexpected reschedule of offline CPU#1! [11384.983646] ------------[ cut here ]------------ Rdma device driver may not have implemented (*get_link_layer)() so it can not be called directly. Should use appropriate helper function. Reviewed-by: Yuval Shaia Fixes: 523633359224 ("IB/core: Fix the validations of a multicast LID in attach or detach operations") Cc: stable@kernel.org # 4.13 Reviewed-by: Dennis Dalessandro Signed-off-by: Alex Estrin Signed-off-by: Dennis Dalessandro Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index ee9e27dc799b..de57d6c11a25 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1646,7 +1646,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) */ if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) { if (attr.qp_state >= IB_QPS_INIT) { - if (qp->device->get_link_layer(qp->device, attr.port_num) != + if (rdma_port_get_link_layer(qp->device, attr.port_num) != IB_LINK_LAYER_INFINIBAND) return true; goto lid_check; @@ -1655,7 +1655,7 @@ static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) /* Can't get a quick answer, iterate over all ports */ for (port = 0; port < qp->device->phys_port_cnt; port++) - if (qp->device->get_link_layer(qp->device, port) != + if (rdma_port_get_link_layer(qp->device, port) != IB_LINK_LAYER_INFINIBAND) num_eth_ports++; From 3d318605f5e32ff44fb290d9b67573b34213c4c8 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 13 Sep 2017 09:52:32 -0700 Subject: [PATCH 223/279] iw_cxgb4: put ep reference in pass_accept_req() The listening endpoint should always be dereferenced at the end of pass_accept_req(). Fixes: f86fac79afec ("RDMA/iw_cxgb4: atomic find and reference for listening endpoints") Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ceaa2fa54d32..83322dbc4711 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2594,9 +2594,9 @@ fail: c4iw_put_ep(&child_ep->com); reject: reject_cr(dev, hwtid, skb); +out: if (parent_ep) c4iw_put_ep(&parent_ep->com); -out: return 0; } From 3c8415cc7aff467faba25841fb859660ac14a04e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 5 Sep 2017 11:52:33 -0700 Subject: [PATCH 224/279] iw_cxgb4: drop listen destroy replies if no ep found If the thread waiting for a CLOSE_LISTSRV_RPL times out and bails, then we need to handle a subsequent CPL if it arrives and the stid has been released. In this case silently drop it. Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 83322dbc4711..19297e408820 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2333,9 +2333,14 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int stid = GET_TID(rpl); struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid); + if (!ep) { + pr_debug("%s stid %d lookup failure!\n", __func__, stid); + goto out; + } pr_debug("%s ep %p\n", __func__, ep); c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); c4iw_put_ep(&ep->com); +out: return 0; } From 8b1bbf36b7452c4acb20e91948eaa5e225ea6978 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 5 Sep 2017 11:52:34 -0700 Subject: [PATCH 225/279] iw_cxgb4: remove the stid on listen create failure If a listen create fails, then the server tid (stid) is incorrectly left in the stid idr table, which can cause a touch-after-free if the stid is looked up and the already freed endpoint is touched. So make sure and remove it in the error path. Cc: stable@vger.kernel.org Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 19297e408820..daf7a56e5d7e 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3462,7 +3462,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = ep; goto out; } - + remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, ep->com.local_addr.ss_family); fail2: From 05f5c38576475439f3124a3e6d62db68de83f7be Mon Sep 17 00:00:00 2001 From: KT Liao Date: Fri, 22 Sep 2017 10:00:57 -0700 Subject: [PATCH 226/279] Input: elan_i2c - extend Flash-Write delay The original 20ms delay is only marginally enough delay after a block write operation during firmware update. Let's increase the delay to ensure that the controller finishes up storing the page to avoid failures in the firmware updates. Signed-off-by: KT Liao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elan_i2c_i2c.c b/drivers/input/mouse/elan_i2c_i2c.c index 15b1330606c1..e19eb60b3d2f 100644 --- a/drivers/input/mouse/elan_i2c_i2c.c +++ b/drivers/input/mouse/elan_i2c_i2c.c @@ -598,7 +598,7 @@ static int elan_i2c_write_fw_block(struct i2c_client *client, } /* Wait for F/W to update one page ROM data. */ - msleep(20); + msleep(35); error = elan_i2c_read_cmd(client, ETP_I2C_IAP_CTRL_CMD, val); if (error) { From af3c79beff48fb2bd1c79d02688004cf57215acf Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Thu, 7 Sep 2017 13:38:18 +0300 Subject: [PATCH 227/279] IB/ipoib: Suppress the retry related completion errors IPoIB doesn't support transport/rnr retry schemes as per RFC so those errors are expected. No need to flood the log files with them. Tested-by: Michael Nowak Tested-by: Rafael Alejandro Peralez Tested-by: Liwen Huang Tested-by: Hong Liu Reviewed-by: Mukesh Kacker Reported-by: Rajiv Raja Signed-off-by: Santosh Shilimkar Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 14b62f7472b4..7774654c2ccb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -823,12 +823,18 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) wc->status != IB_WC_WR_FLUSH_ERR) { struct ipoib_neigh *neigh; - if (wc->status != IB_WC_RNR_RETRY_EXC_ERR) - ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); + /* IB_WC[_RNR]_RETRY_EXC_ERR error is part of the life cycle, + * so don't make waves. + */ + if (wc->status == IB_WC_RNR_RETRY_EXC_ERR || + wc->status == IB_WC_RETRY_EXC_ERR) + ipoib_dbg(priv, + "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n", + __func__, wc->status, wr_id, wc->vendor_err); else - ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n", - wc->status, wr_id, wc->vendor_err); + ipoib_warn(priv, + "%s: failed cm send event (status=%d, wrid=%d vend_err 0x%x)\n", + __func__, wc->status, wr_id, wc->vendor_err); spin_lock_irqsave(&priv->lock, flags); neigh = tx->neigh; From 06564f60859bdf7e73d70ae35d7e285e96ae9c46 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 11 Sep 2017 17:03:13 +0100 Subject: [PATCH 228/279] IB/ocrdma: fix incorrect fall-through on switch statement In the case where mbox_status is OCRDMA_MBX_STATUS_FAILED and add_status is OCRDMA_MBX_STATUS_FAILED err_num is assigned -EAGAIN however the case OCRDMA_MBX_STATUS_FAILED is missing a break and falls through to the default case which then re-assigns err_num to -EFAULT. Fix this so that err_num is assigned to -EAGAIN for the add_status OCRDMA_MBX_STATUS_FAILED case and -EFAULT otherwise. Detected by CoverityScan CID#703125 ("Missing break in switch") Fixes: fe2caefcdf58 ("RDMA/ocrdma: Add driver for Emulex OneConnect IBoE RDMA adapter") Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index dcb5942f9fb5..65b166cc7437 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -252,7 +252,10 @@ static int ocrdma_get_mbx_errno(u32 status) case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES: err_num = -EAGAIN; break; + default: + err_num = -EFAULT; } + break; default: err_num = -EFAULT; } From cbafad87e1507044c7d442087d41d5e3d432cc4e Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 18 Sep 2017 12:28:48 +0100 Subject: [PATCH 229/279] IB/mlx5: fix debugfs cleanup If delay_drop_debugfs_init() fails in any of the operations to create debugfs, it is calling delay_drop_debugfs_cleanup() as part of its cleanup. But delay_drop_debugfs_cleanup() checks for 'dbg' and since we have not yet pointed 'dbg' to the debugfs we need to cleanup, the cleanup fails and we are left with stray debugfs elements and also a memory leak. Fixes: 4a5fd5d2965c ("IB/mlx5: Add necessary delay drop assignment") Signed-off-by: Sudip Mukherjee Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index ab3c562d5ba7..05fb4bdff6a0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3837,11 +3837,13 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) if (!dbg) return -ENOMEM; + dev->delay_drop.dbg = dbg; + dbg->dir_debugfs = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); if (!dbg->dir_debugfs) - return -ENOMEM; + goto out_debugfs; dbg->events_cnt_debugfs = debugfs_create_atomic_t("num_timeout_events", 0400, @@ -3865,8 +3867,6 @@ static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev) if (!dbg->timeout_debugfs) goto out_debugfs; - dev->delay_drop.dbg = dbg; - return 0; out_debugfs: From e13547bc181ae6c279adf0df054717787f24ee89 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 19 Sep 2017 13:22:13 +0300 Subject: [PATCH 230/279] IB/bnxt_re: Fix frame stack compilation warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce stack size by dynamically allocating memory instead of declaring large struct on the stack: drivers/infiniband/hw/bnxt_re/ib_verbs.c: In function ‘bnxt_re_query_qp’: drivers/infiniband/hw/bnxt_re/ib_verbs.c:1600:1: warning: the frame size of 1216 bytes is larger than 1024 bytes [-Wframe-larger-than=] } ^ Cc: Selvin Xavier Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Leon Romanovsky Acked-by: Selvin Xavier Reviewed-by: Jonathan Toppins Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 67 +++++++++++++----------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 01eee15bbd65..03caf48af8e2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1551,43 +1551,46 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; - struct bnxt_qplib_qp qplib_qp; + struct bnxt_qplib_qp *qplib_qp; int rc; - memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp)); - qplib_qp.id = qp->qplib_qp.id; - qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index; + qplib_qp = kzalloc(sizeof(*qplib_qp), GFP_KERNEL); + if (!qplib_qp) + return -ENOMEM; - rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp); + qplib_qp->id = qp->qplib_qp.id; + qplib_qp->ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index; + + rc = bnxt_qplib_query_qp(&rdev->qplib_res, qplib_qp); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to query HW QP"); - return rc; + goto out; } - qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state); - qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0; - qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access); - qp_attr->pkey_index = qplib_qp.pkey_index; - qp_attr->qkey = qplib_qp.qkey; + qp_attr->qp_state = __to_ib_qp_state(qplib_qp->state); + qp_attr->en_sqd_async_notify = qplib_qp->en_sqd_async_notify ? 1 : 0; + qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp->access); + qp_attr->pkey_index = qplib_qp->pkey_index; + qp_attr->qkey = qplib_qp->qkey; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; - rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp.ah.flow_label, - qplib_qp.ah.host_sgid_index, - qplib_qp.ah.hop_limit, - qplib_qp.ah.traffic_class); - rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp.ah.dgid.data); - rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp.ah.sl); - ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp.ah.dmac); - qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu); - qp_attr->timeout = qplib_qp.timeout; - qp_attr->retry_cnt = qplib_qp.retry_cnt; - qp_attr->rnr_retry = qplib_qp.rnr_retry; - qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer; - qp_attr->rq_psn = qplib_qp.rq.psn; - qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic; - qp_attr->sq_psn = qplib_qp.sq.psn; - qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic; - qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR : - IB_SIGNAL_REQ_WR; - qp_attr->dest_qp_num = qplib_qp.dest_qpn; + rdma_ah_set_grh(&qp_attr->ah_attr, NULL, qplib_qp->ah.flow_label, + qplib_qp->ah.host_sgid_index, + qplib_qp->ah.hop_limit, + qplib_qp->ah.traffic_class); + rdma_ah_set_dgid_raw(&qp_attr->ah_attr, qplib_qp->ah.dgid.data); + rdma_ah_set_sl(&qp_attr->ah_attr, qplib_qp->ah.sl); + ether_addr_copy(qp_attr->ah_attr.roce.dmac, qplib_qp->ah.dmac); + qp_attr->path_mtu = __to_ib_mtu(qplib_qp->path_mtu); + qp_attr->timeout = qplib_qp->timeout; + qp_attr->retry_cnt = qplib_qp->retry_cnt; + qp_attr->rnr_retry = qplib_qp->rnr_retry; + qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer; + qp_attr->rq_psn = qplib_qp->rq.psn; + qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic; + qp_attr->sq_psn = qplib_qp->sq.psn; + qp_attr->max_dest_rd_atomic = qplib_qp->max_dest_rd_atomic; + qp_init_attr->sq_sig_type = qplib_qp->sig_type ? IB_SIGNAL_ALL_WR : + IB_SIGNAL_REQ_WR; + qp_attr->dest_qp_num = qplib_qp->dest_qpn; qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe; qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge; @@ -1596,7 +1599,9 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data; qp_init_attr->cap = qp_attr->cap; - return 0; +out: + kfree(qplib_qp); + return rc; } /* Routine for sending QP1 packets for RoCE V1 an V2 From 01df7f5a77b926c2d790cf66d942d2a68d4ad5de Mon Sep 17 00:00:00 2001 From: Adit Ranadive Date: Thu, 21 Sep 2017 15:56:21 -0700 Subject: [PATCH 231/279] RDMA/vmw_pvrdma: Fix reporting correct opcodes for completion Since the IB_WC_BIND_MW opcode has been dropped, set the correct IB WC opcode explicitly. Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver") Reviewed-by: Aditya Sarwade Reviewed-by: Jorgen Hansen Signed-off-by: Adit Ranadive Signed-off-by: Bryan Tan Signed-off-by: Doug Ledford --- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 663a0c301c43..984aa3484928 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -416,9 +416,34 @@ static inline enum ib_wc_status pvrdma_wc_status_to_ib( return (enum ib_wc_status)status; } -static inline int pvrdma_wc_opcode_to_ib(int opcode) +static inline int pvrdma_wc_opcode_to_ib(unsigned int opcode) { - return opcode; + switch (opcode) { + case PVRDMA_WC_SEND: + return IB_WC_SEND; + case PVRDMA_WC_RDMA_WRITE: + return IB_WC_RDMA_WRITE; + case PVRDMA_WC_RDMA_READ: + return IB_WC_RDMA_READ; + case PVRDMA_WC_COMP_SWAP: + return IB_WC_COMP_SWAP; + case PVRDMA_WC_FETCH_ADD: + return IB_WC_FETCH_ADD; + case PVRDMA_WC_LOCAL_INV: + return IB_WC_LOCAL_INV; + case PVRDMA_WC_FAST_REG_MR: + return IB_WC_REG_MR; + case PVRDMA_WC_MASKED_COMP_SWAP: + return IB_WC_MASKED_COMP_SWAP; + case PVRDMA_WC_MASKED_FETCH_ADD: + return IB_WC_MASKED_FETCH_ADD; + case PVRDMA_WC_RECV: + return IB_WC_RECV; + case PVRDMA_WC_RECV_RDMA_WITH_IMM: + return IB_WC_RECV_RDMA_WITH_IMM; + default: + return IB_WC_SEND; + } } static inline int pvrdma_wc_flags_to_ib(int flags) From cd9100ca9e78eb03bf5a0ee2ba98ebdc8cc5880e Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 19 Sep 2017 09:19:09 -0500 Subject: [PATCH 232/279] i40iw: Fail open if there are no available MSI-X vectors Check number of available MSI-X vectors for i40iw. If there are no available vectors, fail the open. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index cc742c3132c6..5965b59a2f83 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1400,6 +1400,11 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev, u32 i; u32 size; + if (!ldev->msix_count) { + i40iw_pr_err("No MSI-X vectors\n"); + return I40IW_ERR_CONFIG; + } + iwdev->msix_count = ldev->msix_count; size = sizeof(struct i40iw_msix_vector) * iwdev->msix_count; @@ -1550,7 +1555,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, status = i40iw_save_msix_info(iwdev, ldev); if (status) - goto exit; + return status; iwdev->hw.dev_context = (void *)ldev->pcidev; iwdev->hw.hw_addr = ldev->hw_addr; status = i40iw_allocate_dma_mem(&iwdev->hw, From 47fb3c1610b28dd435b892762280eebf04fe9adf Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 19 Sep 2017 09:19:10 -0500 Subject: [PATCH 233/279] i40iw: Prevent multiple netdev event notifier registrations Netdev event notifier registration/de-registration is not synchronized with a lock and there is a possibility of a duplicate registration of notifier before the unregister completes. Register netdev event notifiers during module init and de-register them at module exit. This avoids the need to tie the registration to first netdev client interface open and de-registration to last client interface close and the synchronization to achieve it. This also fixes a crash due to duplicate registration. BUG: unable to handle kernel paging request at ffffffffa0d60388 IP: [] notifier_call_chain+0x3d/0x70 PGD 190d067 PUD 190e063 PMD 76c840067 PTE 0 Oops: 0000 [#1] SMP Modules linked in: i40e(OF-) fuse btrfs zlib_deflate raid6_pq xor vfat msdos [..] e1000e vxlan ip_tunnel ptp pps_core i2c_core video [last unloaded: i40iw] CPU: 1 PID: 27101 Comm: modprobe Tainted: GF W O-------------- 3.10.0-229.el7.x86_64 #1 Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./Q87M-D2H, BIOS F7 01/17/2014 task: ffff88076e8a96c0 ti: ffff8806959c8000 task.ti: ffff8806959c8000 RIP: 0010:[] [] notifier_call_chain+0x3d/0x70 RSP: 0018:ffff8806959cbb38 EFLAGS: 00010282 RAX: ffffffffa0d60380 RBX: 00000000fffffffd RCX: 0000000000000000 0708] RDX: 0000000000000000 RSI: ffff88081227a000 RDI: 0000000000000002 RBP: ffff8806959cbb60 R08: 0000000000000246 R09: 000000000000700c R10: ffff88080e16ea40 R11: 00000000000ae8df R12: ffffffffa0d60380 R13: 0000000000000002 R14: ffff88076e738800 R15: 0000000000000000 FS: 00007f604ef4a740(0000) GS:ffff88083e240000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0d60388 CR3: 0000000753cd2000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffff819e73a0 0000000000000000 0000000000000002 ffff88076e738800 00000000ffffffff ffff8806959cbba0 ffffffff8109d61d 0000000000000000 0000000000000000 ffff88076e738800 0000000000000000 ffff88076e738800 Call Trace: [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] __inet_del_ifa+0x154/0x2b0 [] inetdev_event+0x182/0x530 [] notifier_call_chain+0x4c/0x70 [] raw_notifier_call_chain+0x16/0x20 [] call_netdevice_notifiers+0x2d/0x60 [] rollback_registered_many+0x105/0x220 [] rollback_registered+0x40/0x70 [] unregister_netdevice_queue+0x48/0x80 [] unregister_netdev+0x1c/0x30 [] i40e_vsi_release+0x2a9/0x2b0 [i40e] [] i40e_remove+0x128/0x2b0 [i40e] [] pci_device_remove+0x3b/0xb0 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xb8/0xc0 [] bus_remove_driver+0x9b/0x120 [] driver_unregister+0x2c/0x50 [] pci_unregister_driver+0x2c/0x90 [] i40e_exit_module+0x10/0x23 [i40e] [] SyS_delete_module+0x16b/0x2d0 [] ? do_notify_resume+0x9c/0xb0 [] system_call_fastpath+0x16/0x1b Code: e5 41 57 4d 89 c7 41 56 49 89 d6 41 55 49 89 f5 41 54 53 89 cb 75 14 eb 3d 0f 1f 44 00 00 83 eb 01 74 25 4d 85 e4 74 20 4c 89 e0 <4c> 8b 60 08 4c 89 f2 4c 89 ee 48 89 c7 ff 10 4d 85 ff 74 04 41 RIP [] notifier_call_chain+0x3d/0x70 Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 - drivers/infiniband/hw/i40iw/i40iw_main.c | 32 ++++++++++++----------- drivers/infiniband/hw/i40iw/i40iw_utils.c | 6 ++--- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 9b1566468744..a65e4cbdce2f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -201,7 +201,6 @@ enum init_completion_state { CEQ_CREATED, ILQ_CREATED, IEQ_CREATED, - INET_NOTIFIER, IP_ADDR_REGISTERED, RDMA_DEV_REGISTERED }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 5965b59a2f83..27590ae21881 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -99,8 +99,6 @@ static struct notifier_block i40iw_net_notifier = { .notifier_call = i40iw_net_event }; -static atomic_t i40iw_notifiers_registered; - /** * i40iw_find_i40e_handler - find a handler given a client info * @ldev: pointer to a client info @@ -1376,11 +1374,20 @@ error: */ static void i40iw_register_notifiers(void) { - if (atomic_inc_return(&i40iw_notifiers_registered) == 1) { - register_inetaddr_notifier(&i40iw_inetaddr_notifier); - register_inet6addr_notifier(&i40iw_inetaddr6_notifier); - register_netevent_notifier(&i40iw_net_notifier); - } + register_inetaddr_notifier(&i40iw_inetaddr_notifier); + register_inet6addr_notifier(&i40iw_inetaddr6_notifier); + register_netevent_notifier(&i40iw_net_notifier); +} + +/** + * i40iw_unregister_notifiers - unregister tcp ip notifiers + */ + +static void i40iw_unregister_notifiers(void) +{ + unregister_netevent_notifier(&i40iw_net_notifier); + unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); + unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); } /** @@ -1467,12 +1474,6 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev) if (!iwdev->reset) i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ - case INET_NOTIFIER: - if (!atomic_dec_return(&i40iw_notifiers_registered)) { - unregister_netevent_notifier(&i40iw_net_notifier); - unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); - unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); - } /* fallthrough */ case PBLE_CHUNK_MEM: i40iw_destroy_pble_pool(dev, iwdev->pble_rsrc); @@ -1672,8 +1673,6 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) break; iwdev->init_state = PBLE_CHUNK_MEM; iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM); - i40iw_register_notifiers(); - iwdev->init_state = INET_NOTIFIER; status = i40iw_add_mac_ip(iwdev); if (status) break; @@ -2023,6 +2022,8 @@ static int __init i40iw_init_module(void) i40iw_client.type = I40E_CLIENT_IWARP; spin_lock_init(&i40iw_handler_lock); ret = i40e_register_client(&i40iw_client); + i40iw_register_notifiers(); + return ret; } @@ -2034,6 +2035,7 @@ static int __init i40iw_init_module(void) */ static void __exit i40iw_exit_module(void) { + i40iw_unregister_notifiers(); i40e_unregister_client(&i40iw_client); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 62f1f45b8737..e52dbbb4165e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -160,7 +160,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; - if (iwdev->init_state < INET_NOTIFIER) + if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing) return NOTIFY_DONE; netdev = iwdev->ldev->netdev; @@ -217,7 +217,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, return NOTIFY_DONE; iwdev = &hdl->device; - if (iwdev->init_state < INET_NOTIFIER) + if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing) return NOTIFY_DONE; netdev = iwdev->ldev->netdev; @@ -266,7 +266,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * if (!iwhdl) return NOTIFY_DONE; iwdev = &iwhdl->device; - if (iwdev->init_state < INET_NOTIFIER) + if (iwdev->init_state < IP_ADDR_REGISTERED || iwdev->closing) return NOTIFY_DONE; p = (__be32 *)neigh->primary_key; i40iw_copy_ip_ntohl(local_ipaddr, p); From 471b370d52a4a461bf855ff542b544f3e4a5cb3a Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 19 Sep 2017 09:19:11 -0500 Subject: [PATCH 234/279] i40iw: Call i40iw_cm_disconn on modify QP to disconnect If QP modify to closing/terminate/error fails, connection is not torn down as there is no corresponding asynchronous event that will initiate the teardown. Add explicit call to i40iw_cm_disconn if not waiting in modify QP, otherwise schedule it in CM timer. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 1aa411034a27..28b3d02d511b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1027,7 +1027,19 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED; iwqp->last_aeq = I40IW_AE_RESET_SENT; spin_unlock_irqrestore(&iwqp->lock, flags); + i40iw_cm_disconn(iwqp); } + } else { + spin_lock_irqsave(&iwqp->lock, flags); + if (iwqp->cm_id) { + if (atomic_inc_return(&iwqp->close_timer_started) == 1) { + iwqp->cm_id->add_ref(iwqp->cm_id); + i40iw_schedule_cm_timer(iwqp->cm_node, + (struct i40iw_puda_buf *)iwqp, + I40IW_TIMER_TYPE_CLOSE, 1, 0); + } + } + spin_unlock_irqrestore(&iwqp->lock, flags); } } return 0; From dfc612b3407e88913a58db00b3bca93685d4f4f9 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 19 Sep 2017 09:19:12 -0500 Subject: [PATCH 235/279] i40iw: Add missing VLAN priority Set the VLAN priority which is in the upper 3 bits of the VLAN tag field in the QP context. Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 3 ++- drivers/infiniband/hw/i40iw/i40iw_cm.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 14f36ba4e5be..b7215448bb63 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3255,7 +3255,8 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node, tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss)); if (cm_node->vlan_id < VLAN_TAG_PRESENT) { tcp_info->insert_vlan_tag = true; - tcp_info->vlan_tag = cpu_to_le16(cm_node->vlan_id); + tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) | + cm_node->vlan_id); } if (cm_node->ipv4) { tcp_info->src_port = cpu_to_le16(cm_node->loc_port); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 2e52e38ffcf3..8626e7f1fdd3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -71,6 +71,8 @@ #define I40IW_HW_IRD_SETTING_32 32 #define I40IW_HW_IRD_SETTING_64 64 +#define I40IW_VLAN_PRIO_SHIFT 13 + enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ IETF_MPA_FLAGS_CRC = 0x40, /* receive Markers */ From f16dc0aa5ea20a2cf173e82ade5f05bfecaa850a Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 19 Sep 2017 09:19:13 -0500 Subject: [PATCH 236/279] i40iw: Add support for port reuse on active side connections During OpenMPI scale up testing, we observe rdma_connect failures if ports are reused on multiple connections. This is because the Control Queue-Pair (CQP) command to add the reused port to Accelerated Port Bit VectorTable (APBVT) fails as there already exists an entry. Check for duplicate port before invoking the CQP command to add APBVT entry and delete the entry only if the port is not in use. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 151 ++++++++++++------------- drivers/infiniband/hw/i40iw/i40iw_cm.h | 3 + 2 files changed, 78 insertions(+), 76 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index b7215448bb63..5230dd3c938c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1504,23 +1504,40 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, } /** - * listen_port_in_use - determine if port is in use - * @port: Listen port number + * i40iw_port_in_use - determine if port is in use + * @port: port number + * @active_side: flag for listener side vs active side */ -static bool i40iw_listen_port_in_use(struct i40iw_cm_core *cm_core, u16 port) +static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side) { struct i40iw_cm_listener *listen_node; + struct i40iw_cm_node *cm_node; unsigned long flags; bool ret = false; - spin_lock_irqsave(&cm_core->listen_list_lock, flags); - list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { - if (listen_node->loc_port == port) { - ret = true; - break; + if (active_side) { + /* search connected node list */ + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each_entry(cm_node, &cm_core->connected_nodes, list) { + if (cm_node->loc_port == port) { + ret = true; + break; + } } + if (!ret) + clear_bit(port, cm_core->active_side_ports); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + } else { + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { + if (listen_node->loc_port == port) { + ret = true; + break; + } + } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + return ret; } @@ -1868,7 +1885,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core, spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (listener->iwdev) { - if (apbvt_del && !i40iw_listen_port_in_use(cm_core, listener->loc_port)) + if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false)) i40iw_manage_apbvt(listener->iwdev, listener->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2247,21 +2264,21 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) if (cm_node->listener) { i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { - if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) && - cm_node->apbvt_set) { + if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) { i40iw_manage_apbvt(cm_node->iwdev, cm_node->loc_port, I40IW_MANAGE_APBVT_DEL); - i40iw_get_addr_info(cm_node, &nfo); - if (cm_node->qhash_set) { - i40iw_manage_qhash(cm_node->iwdev, - &nfo, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_DELETE, - NULL, - false); - cm_node->qhash_set = 0; - } + cm_node->apbvt_set = 0; + } + i40iw_get_addr_info(cm_node, &nfo); + if (cm_node->qhash_set) { + i40iw_manage_qhash(cm_node->iwdev, + &nfo, + I40IW_QHASH_TYPE_TCP_ESTABLISHED, + I40IW_QHASH_MANAGE_TYPE_DELETE, + NULL, + false); + cm_node->qhash_set = 0; } } @@ -3738,10 +3755,8 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in *raddr; struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; - bool qhash_set = false; - int apbvt_set = 0; - int err = 0; - enum i40iw_status_code status; + int ret = 0; + unsigned long flags; ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -3790,32 +3805,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_info.user_pri = rt_tos2priority(cm_id->tos); i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n", __func__, cm_id->tos, cm_info.user_pri); - if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) || - (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32, - raddr6->sin6_addr.in6_u.u6_addr32, - sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) { - status = i40iw_manage_qhash(iwdev, - &cm_info, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_ADD, - NULL, - true); - if (status) - return -EINVAL; - qhash_set = true; - } - status = i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD); - if (status) { - i40iw_manage_qhash(iwdev, - &cm_info, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_DELETE, - NULL, - false); - return -EINVAL; - } - - apbvt_set = 1; cm_id->add_ref(cm_id); cm_node = i40iw_create_cm_node(&iwdev->cm_core, iwdev, conn_param->private_data_len, @@ -3823,17 +3812,40 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) &cm_info); if (IS_ERR(cm_node)) { - err = PTR_ERR(cm_node); - goto err_out; + ret = PTR_ERR(cm_node); + cm_id->rem_ref(cm_id); + return ret; } + if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) || + (!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32, + raddr6->sin6_addr.in6_u.u6_addr32, + sizeof(laddr6->sin6_addr.in6_u.u6_addr32)))) { + if (i40iw_manage_qhash(iwdev, &cm_info, I40IW_QHASH_TYPE_TCP_ESTABLISHED, + I40IW_QHASH_MANAGE_TYPE_ADD, NULL, true)) { + ret = -EINVAL; + goto err; + } + cm_node->qhash_set = true; + } + + spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); + if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) { + spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); + if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) { + ret = -EINVAL; + goto err; + } + } else { + spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); + } + + cm_node->apbvt_set = true; i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && !cm_node->ord_size) cm_node->ord_size = 1; - cm_node->apbvt_set = apbvt_set; - cm_node->qhash_set = qhash_set; iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; @@ -3841,11 +3853,9 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { cm_node->state = I40IW_CM_STATE_SYN_SENT; - err = i40iw_send_syn(cm_node, 0); - if (err) { - i40iw_rem_ref_cm_node(cm_node); - goto err_out; - } + ret = i40iw_send_syn(cm_node, 0); + if (ret) + goto err; } i40iw_debug(cm_node->dev, @@ -3854,9 +3864,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->rem_port, cm_node, cm_node->cm_id); + return 0; -err_out: +err: if (cm_info.ipv4) i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, @@ -3868,22 +3879,10 @@ err_out: "Api - connect() FAILED: dest addr=%pI6", cm_info.rem_addr); - if (qhash_set) - i40iw_manage_qhash(iwdev, - &cm_info, - I40IW_QHASH_TYPE_TCP_ESTABLISHED, - I40IW_QHASH_MANAGE_TYPE_DELETE, - NULL, - false); - - if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core, - cm_info.loc_port)) - i40iw_manage_apbvt(iwdev, - cm_info.loc_port, - I40IW_MANAGE_APBVT_DEL); + i40iw_rem_ref_cm_node(cm_node); cm_id->rem_ref(cm_id); iwdev->cm_core.stats_connect_errs++; - return err; + return ret; } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 8626e7f1fdd3..45abef76295b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -71,6 +71,7 @@ #define I40IW_HW_IRD_SETTING_32 32 #define I40IW_HW_IRD_SETTING_64 64 +#define MAX_PORTS 65536 #define I40IW_VLAN_PRIO_SHIFT 13 enum ietf_mpa_flags { @@ -413,6 +414,8 @@ struct i40iw_cm_core { spinlock_t ht_lock; /* manage hash table */ spinlock_t listen_list_lock; /* listen list */ + unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)]; + u64 stats_nodes_created; u64 stats_nodes_destroyed; u64 stats_listen_created; From 432654df90f2a7d8ac8438905208074604ed3e00 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 11 Sep 2017 21:41:43 +0200 Subject: [PATCH 237/279] parisc: Fix too large frame size warnings The parisc architecture has larger stack frames than most other architectures on 32-bit kernels. Increase the maximum allowed stack frame to 1280 bytes for parisc to avoid warnings in the do_sys_poll() and pat_memconfig() functions. Signed-off-by: Helge Deller --- lib/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b19c491cbc4e..2689b7c50c52 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -219,7 +219,8 @@ config FRAME_WARN range 0 8192 default 0 if KASAN default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1024 if !64BIT + default 1280 if (!64BIT && PARISC) + default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT help Tell gcc to warn at build time for stack frames larger than this. From e77900abfd8be4e207412d8b7752dbb9838e2571 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Sep 2017 21:05:02 +0200 Subject: [PATCH 238/279] parisc: Stop unwinding at start of stack Check stack pointer if we are reaching the stack end and stop unwinding if we do. This fixes early backtraces and avoids showing unrealistic call stacks. Signed-off-by: Helge Deller --- arch/parisc/kernel/unwind.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 48dc7d4d20bb..caab39dfa95d 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -279,6 +280,17 @@ static void unwind_frame_regs(struct unwind_frame_info *info) info->prev_sp = sp - 64; info->prev_ip = 0; + + /* The stack is at the end inside the thread_union + * struct. If we reach data, we have reached the + * beginning of the stack and should stop unwinding. */ + if (info->prev_sp >= (unsigned long) task_thread_info(info->t) && + info->prev_sp < ((unsigned long) task_thread_info(info->t) + + THREAD_SZ_ALGN)) { + info->prev_sp = 0; + break; + } + if (get_user(tmp, (unsigned long *)(info->prev_sp - RP_OFFSET))) break; info->prev_ip = tmp; From 08b8a99b2c5ea8da4d3dd55056881d12baea1e04 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Sep 2017 21:17:10 +0200 Subject: [PATCH 239/279] parisc: Move start_parisc() into init section Signed-off-by: Helge Deller --- arch/parisc/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index dee6f9d6a153..a31e91c1782b 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -398,9 +399,8 @@ static int __init parisc_init(void) } arch_initcall(parisc_init); -void start_parisc(void) +void __init start_parisc(void) { - extern void start_kernel(void); extern void early_trap_init(void); int ret, cpunum; From 77089c5274fe2f72db5a2cd956d0d308aed08e68 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Sep 2017 21:15:09 +0200 Subject: [PATCH 240/279] parisc: Add wrapper for pdc_instr() firmware function Signed-off-by: Helge Deller --- arch/parisc/include/asm/pdc.h | 1 + arch/parisc/kernel/firmware.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index 26b4455baa83..510341f62d97 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -280,6 +280,7 @@ void setup_pdc(void); /* in inventory.c */ /* wrapper-functions from pdc.c */ int pdc_add_valid(unsigned long address); +int pdc_instr(unsigned int *instr); int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len); int pdc_chassis_disp(unsigned long disp); int pdc_chassis_warn(unsigned long *warn); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index ab80e5c6f651..6d471c00c71a 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -232,6 +232,26 @@ int pdc_add_valid(unsigned long address) } EXPORT_SYMBOL(pdc_add_valid); +/** + * pdc_instr - Get instruction that invokes PDCE_CHECK in HPMC handler. + * @instr: Pointer to variable which will get instruction opcode. + * + * The return value is PDC_OK (0) in case call succeeded. + */ +int __init pdc_instr(unsigned int *instr) +{ + int retval; + unsigned long flags; + + spin_lock_irqsave(&pdc_lock, flags); + retval = mem_pdc_call(PDC_INSTR, 0UL, __pa(pdc_result)); + convert_to_wide(pdc_result); + *instr = pdc_result[0]; + spin_unlock_irqrestore(&pdc_lock, flags); + + return retval; +} + /** * pdc_chassis_info - Return chassis information. * @result: The return buffer. From 8d771b143fe2e3941fc8a32926d21410004578c0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 17 Sep 2017 21:28:11 +0200 Subject: [PATCH 241/279] parisc: Add PDCE_CHECK instruction to HPMC handler According to the programming note at page 1-31 of the PA 1.1 Firmware Architecture document, one should use the PDC_INSTR firmware function to get the instruction that invokes a PDCE_CHECK in the HPMC handler. This patch follows this note and sets the instruction which has been a nop up until now. Testing on a C3000 and C8000 showed that this firmware call isn't implemented on those machines, so maybe it's only needed on older ones. Signed-off-by: Helge Deller --- arch/parisc/kernel/traps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 991654c88eec..230333157fe3 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -817,7 +817,7 @@ void __init initialize_ivt(const void *iva) u32 check = 0; u32 *ivap; u32 *hpmcp; - u32 length; + u32 length, instr; if (strcmp((const char *)iva, "cows can fly")) panic("IVT invalid"); @@ -827,6 +827,14 @@ void __init initialize_ivt(const void *iva) for (i = 0; i < 8; i++) *ivap++ = 0; + /* + * Use PDC_INSTR firmware function to get instruction that invokes + * PDCE_CHECK in HPMC handler. See programming note at page 1-31 of + * the PA 1.1 Firmware Architecture document. + */ + if (pdc_instr(&instr) == PDC_OK) + ivap[0] = instr; + /* Compute Checksum for HPMC handler */ length = os_hpmc_size; ivap[7] = length; From ea6976483fb0ced259fbaa9e4f68a2cdcee7e312 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 18 Sep 2017 17:55:24 +0200 Subject: [PATCH 242/279] parisc: Check if initrd was loaded into broken RAM While scanning the PDT for reported broken memory modules, warn if the initrd was coincidentally loaded into bad memory. Signed-off-by: Helge Deller --- arch/parisc/kernel/pdt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 05730a83895c..00aed082969b 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -216,8 +217,16 @@ void __init pdc_pdt_init(void) } for (i = 0; i < pdt_status.pdt_entries; i++) { + unsigned long addr; + report_mem_err(pdt_entry[i]); + addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK; + if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && + addr >= initrd_start && addr < initrd_end) + pr_crit("CRITICAL: initrd possibly broken " + "due to bad memory!\n"); + /* mark memory page bad */ memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE); } From a7e6601f70a53957b1d01c321319f0237bba5202 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 21 Sep 2017 21:22:27 +0200 Subject: [PATCH 243/279] parisc: Move init_per_cpu() into init section Signed-off-by: Helge Deller --- arch/parisc/include/asm/smp.h | 1 + arch/parisc/kernel/processor.c | 2 +- arch/parisc/kernel/setup.c | 2 +- arch/parisc/kernel/smp.c | 3 +-- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h index a5dc9066c6d8..ad9c9c3b4136 100644 --- a/arch/parisc/include/asm/smp.h +++ b/arch/parisc/include/asm/smp.h @@ -1,6 +1,7 @@ #ifndef __ASM_SMP_H #define __ASM_SMP_H +extern int init_per_cpu(int cpuid); #if defined(CONFIG_SMP) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index a778bd3c107c..e120d63c1b28 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -317,7 +317,7 @@ void __init collect_boot_cpu_data(void) * * o Enable CPU profiling hooks. */ -int init_per_cpu(int cpunum) +int __init init_per_cpu(int cpunum) { int ret; struct pdc_coproc_cfg coproc_cfg; diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index a31e91c1782b..f7d0c3b33d70 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include static char __initdata command_line[COMMAND_LINE_SIZE]; @@ -116,7 +117,6 @@ void __init dma_ops_init(void) } #endif -extern int init_per_cpu(int cpuid); extern void collect_boot_cpu_data(void); void __init setup_arch(char **cmdline_p) diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 63365106ea19..30c28ab14540 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -255,12 +255,11 @@ void arch_send_call_function_single_ipi(int cpu) static void __init smp_cpu_init(int cpunum) { - extern int init_per_cpu(int); /* arch/parisc/kernel/processor.c */ extern void init_IRQ(void); /* arch/parisc/kernel/irq.c */ extern void start_cpu_itimer(void); /* arch/parisc/kernel/time.c */ /* Set modes and Enable floating point coprocessor */ - (void) init_per_cpu(cpunum); + init_per_cpu(cpunum); disable_sr_hashing(); From 606f95e4255845155f62504a9e1f12665b1853c8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 21 Sep 2017 21:52:08 +0200 Subject: [PATCH 244/279] parisc: Add HWPOISON page fault handler code Commit 24587380f61d ("parisc: Add MADV_HWPOISON and MADV_SOFT_OFFLINE") added the necessary constants to handle hardware-poisoning. Those were needed to support the page deallocation feature from firmware. But I completely missed to add the relevant fault handler code. This now showed up when I ran the madvise07 testcase from the Linux Test Project, which failed with a kernel BUG at arch/parisc/mm/fault.c:320. With this patch the parisc kernel now behaves like other platforms and gives the same kernel syslog warnings when poisoning pages. Signed-off-by: Helge Deller --- arch/parisc/mm/fault.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 5b101f6a5607..e247edbca68e 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -261,7 +262,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, struct task_struct *tsk; struct mm_struct *mm; unsigned long acc_type; - int fault; + int fault = 0; unsigned int flags; if (faulthandler_disabled()) @@ -315,7 +316,8 @@ good_area: goto out_of_memory; else if (fault & VM_FAULT_SIGSEGV) goto bad_area; - else if (fault & VM_FAULT_SIGBUS) + else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| + VM_FAULT_HWPOISON_LARGE)) goto bad_area; BUG(); } @@ -352,8 +354,7 @@ bad_area: if (user_mode(regs)) { struct siginfo si; - - show_signal_msg(regs, code, address, tsk, vma); + unsigned int lsb = 0; switch (code) { case 15: /* Data TLB miss fault/Data page fault */ @@ -386,6 +387,30 @@ bad_area: si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR; break; } + +#ifdef CONFIG_MEMORY_FAILURE + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + printk(KERN_ERR + "MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n", + tsk->comm, tsk->pid, address); + si.si_signo = SIGBUS; + si.si_code = BUS_MCEERR_AR; + } +#endif + + /* + * Either small page or large page may be poisoned. + * In other words, VM_FAULT_HWPOISON_LARGE and + * VM_FAULT_HWPOISON are mutually exclusive. + */ + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + else if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + else + show_signal_msg(regs, code, address, tsk, vma); + si.si_addr_lsb = lsb; + si.si_errno = 0; si.si_addr = (void __user *) address; force_sig_info(si.si_signo, &si, current); From f9b941baa4b78dafcbc4790c0ed2fbbe2aafed06 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 31 Aug 2017 09:27:28 +0530 Subject: [PATCH 245/279] bnxt_re: Fix update of qplib_qp.mtu when modified The MTU value in the qplib_qp.mtu should be consistent with whatever mtu was set during INIT to RTR.The Next PSN and number of packets are calculated based on this member in the qplib_qp structure. Signed-off-by: Narender Reddy Signed-off-by: Devesh Sharma Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 03caf48af8e2..11bbf3e748b0 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1436,11 +1436,14 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu); + qp->qplib_qp.mtu = ib_mtu_enum_to_int(qp_attr->path_mtu); } else if (qp_attr->qp_state == IB_QPS_RTR) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU; qp->qplib_qp.path_mtu = __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu)); + qp->qplib_qp.mtu = + ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu)); } if (qp_attr_mask & IB_QP_TIMEOUT) { From 2b6376305dcb2cb79d0599cf3b9ce98aa860cb4f Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Aug 2017 09:27:29 +0530 Subject: [PATCH 246/279] bnxt_re: Stop issuing further cmds to FW once a cmd times out Once a cmd to FW times out(after 20s) it is reasonable to assume the FW or atleast the control path is dead. No point issuing further cmds to the FW as each subsequent cmd with another 20s timeout will cascade resulting in unnecessary traces and/or NMI Lockups. Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 4 ++++ drivers/infiniband/hw/bnxt_re/qplib_rcfw.h | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 391bb7006e8f..2bdb1562bd21 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -107,6 +107,9 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, return -EINVAL; } + if (test_bit(FIRMWARE_TIMED_OUT, &rcfw->flags)) + return -ETIMEDOUT; + /* Cmdq are in 16-byte units, each request can consume 1 or more * cmdqe */ @@ -226,6 +229,7 @@ int bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /* timed out */ dev_err(&rcfw->pdev->dev, "QPLIB: cmdq[%#x]=%#x timedout (%d)msec", cookie, opcode, RCFW_CMD_WAIT_TIME_MS); + set_bit(FIRMWARE_TIMED_OUT, &rcfw->flags); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 0ed312f17c8d..85b16da287f9 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -162,8 +162,9 @@ struct bnxt_qplib_rcfw { unsigned long *cmdq_bitmap; u32 bmap_size; unsigned long flags; -#define FIRMWARE_INITIALIZED_FLAG 1 +#define FIRMWARE_INITIALIZED_FLAG BIT(0) #define FIRMWARE_FIRST_FLAG BIT(31) +#define FIRMWARE_TIMED_OUT BIT(3) wait_queue_head_t waitq; int (*aeq_handler)(struct bnxt_qplib_rcfw *, struct creq_func_event *); From 55311d055175cada8249d39436371afe790df699 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 31 Aug 2017 09:27:30 +0530 Subject: [PATCH 247/279] bnxt_re: Fix compare and swap atomic operands Driver must assign the user supplied compare/swap values in the wqe to successfully complete the atomic compare and swap operation. Signed-off-by: Devesh Sharma Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 11bbf3e748b0..0dbdbe1616ab 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1916,6 +1916,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr, switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP; + wqe->atomic.cmp_data = atomic_wr(wr)->compare_add; wqe->atomic.swap_data = atomic_wr(wr)->swap; break; case IB_WR_ATOMIC_FETCH_AND_ADD: From 027c892924eac22f5ca8db4100bccde423be797d Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Aug 2017 09:27:31 +0530 Subject: [PATCH 248/279] bnxt_re: Free up devices in module_exit path Clean up all devices added to the bnxt_re_dev_list in the module_exit entry point. Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/main.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 82d1cbc27aee..00a3b743c156 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1375,6 +1375,22 @@ err_netdev: static void __exit bnxt_re_mod_exit(void) { + struct bnxt_re_dev *rdev; + LIST_HEAD(to_be_deleted); + + mutex_lock(&bnxt_re_dev_lock); + /* Free all adapter allocated resources */ + if (!list_empty(&bnxt_re_dev_list)) + list_splice_init(&bnxt_re_dev_list, &to_be_deleted); + mutex_unlock(&bnxt_re_dev_lock); + + list_for_each_entry(rdev, &to_be_deleted, list) { + dev_info(rdev_to_dev(rdev), "Unregistering Device"); + bnxt_re_dev_stop(rdev); + bnxt_re_ib_unreg(rdev, true); + bnxt_re_remove_one(rdev); + bnxt_re_dev_unreg(rdev); + } unregister_netdevice_notifier(&bnxt_re_netdev_notifier); if (bnxt_re_wq) destroy_workqueue(bnxt_re_wq); From d5917307bb1caa9cb0a915951c57f4cdbacca443 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Aug 2017 09:27:32 +0530 Subject: [PATCH 249/279] bnxt_re: Fix race between the netdev register and unregister events Upon receipt of the NETDEV_REGISTER event from the netdev notifier chain, the IB stack registration is spawned off to a workqueue since that also requires an rtnl lock. There could be 2 kinds of races between the NETDEV_REGISTER and the NETDEV_UNREGISTER event handling. a)The NETDEV_UNREGISTER event is received in rapid succession after the NETDEV_REGISTER event even before the work queue got a chance to run. b)The NETDEV_UNREGISTER event is received while the workqueue that handles registration with the IB stack is still in progress. Handle both the races with a bit flag that is set just before the work item is queued and cleared in the workqueue after the event is handled just before the workqueue item is freed. While adding the new flag, it was noted that the flags are all used in *_bit() operations which expect a bit number and not a literal constant with a bit set. So change the numbers to be bit numbers. Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 12 +++++++----- drivers/infiniband/hw/bnxt_re/main.c | 8 ++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index b3ad37fec578..a25f9d240880 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -93,11 +93,13 @@ struct bnxt_re_dev { struct ib_device ibdev; struct list_head list; unsigned long flags; -#define BNXT_RE_FLAG_NETDEV_REGISTERED 0 -#define BNXT_RE_FLAG_IBDEV_REGISTERED 1 -#define BNXT_RE_FLAG_GOT_MSIX 2 -#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 8 -#define BNXT_RE_FLAG_QOS_WORK_REG 16 +#define BNXT_RE_FLAG_NETDEV_REGISTERED 0 +#define BNXT_RE_FLAG_IBDEV_REGISTERED 1 +#define BNXT_RE_FLAG_GOT_MSIX 2 +#define BNXT_RE_FLAG_HAVE_L2_REF 3 +#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 +#define BNXT_RE_FLAG_QOS_WORK_REG 5 +#define BNXT_RE_FLAG_TASK_IN_PROG 6 struct net_device *netdev; unsigned int version, major, minor; struct bnxt_en_dev *en_dev; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 00a3b743c156..29c3d7e254af 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1259,6 +1259,8 @@ static void bnxt_re_task(struct work_struct *work) default: break; } + smp_mb__before_atomic(); + clear_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags); kfree(re_work); } @@ -1317,6 +1319,11 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, break; case NETDEV_UNREGISTER: + /* netdev notifier will call NETDEV_UNREGISTER again later since + * we are still holding the reference to the netdev + */ + if (test_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags)) + goto exit; bnxt_re_ib_unreg(rdev, false); bnxt_re_remove_one(rdev); bnxt_re_dev_unreg(rdev); @@ -1335,6 +1342,7 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier, re_work->vlan_dev = (real_dev == netdev ? NULL : netdev); INIT_WORK(&re_work->work, bnxt_re_task); + set_bit(BNXT_RE_FLAG_TASK_IN_PROG, &rdev->flags); queue_work(bnxt_re_wq, &re_work->work); } } From 74828b128115033ff25d4140d732a05a36eaeaf0 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Aug 2017 09:27:33 +0530 Subject: [PATCH 250/279] bnxt_re: Remove RTNL lock dependency in bnxt_re_query_port When there is a NETDEV_UNREGISTER event, bnxt_re driver calls ib_unregister_device() (RTNL lock held). ib_unregister_device attempts to flush a worker queue scheduled by ib_core and that queue might have a pending ib_query_port(). ib_query_port in turn calls bnxt_re_query_port(), which while querying the link speed using ib_get_eth_speed(), tries to acquire the rtnl_lock() which was already held by NETDEV_UNREGISTER. Fixing the issue by removing the link speed query from bnxt_re_query_port() Now the speed is queried post a successful ib_register_device or whenever there is a NETDEV_CHANGE event. Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 2 ++ drivers/infiniband/hw/bnxt_re/ib_verbs.c | 11 +++-------- drivers/infiniband/hw/bnxt_re/main.c | 4 ++++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index a25f9d240880..ecbac91b2e14 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -110,6 +110,8 @@ struct bnxt_re_dev { struct delayed_work worker; u8 cur_prio_map; + u8 active_speed; + u8 active_width; /* FP Notification Queue (CQ & SRQ) */ struct tasklet_struct nq_task; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0dbdbe1616ab..7430ef07a0e1 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -259,14 +259,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, port_attr->sm_sl = 0; port_attr->subnet_timeout = 0; port_attr->init_type_reply = 0; - /* call the underlying netdev's ethtool hooks to query speed settings - * for which we acquire rtnl_lock _only_ if it's registered with - * IB stack to avoid race in the NETDEV_UNREG path - */ - if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) - if (ib_get_eth_speed(ibdev, port_num, &port_attr->active_speed, - &port_attr->active_width)) - return -EINVAL; + port_attr->active_speed = rdev->active_speed; + port_attr->active_width = rdev->active_width; + return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 29c3d7e254af..e7450ea92aa9 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1161,6 +1161,8 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } } set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags); + ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, + &rdev->active_width); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE); @@ -1255,6 +1257,8 @@ static void bnxt_re_task(struct work_struct *work) else if (netif_carrier_ok(rdev->netdev)) bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); + ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed, + &rdev->active_width); break; default: break; From 1993519be8bc86342687c61d8d11c3ade62b3b84 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Thu, 31 Aug 2017 09:27:34 +0530 Subject: [PATCH 251/279] bnxt_re: Fix memory leak in FRMR path This patch fixes a memory leak issue when alloc_mr is used. mr->pages and mr->npages are used only in alloc_mr path. mr->pages is allocated when alloc_mr is called or in the case of FRMR, while creating the MR. mr->npages is updated only when the MR created is used i.e. after invoking map_mr_sg verb, before data transfer. In the dereg_mr path, if mr->npages is 0, driver ends up not freeing the memory created. Removing the npages check from the dereg_mr path for kernel consumers. Signed-off-by: Selvin Xavier Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 7430ef07a0e1..d7be4e4227ce 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3066,7 +3066,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr) return rc; } - if (mr->npages && mr->pages) { + if (mr->pages) { rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res, &mr->qplib_frpl); kfree(mr->pages); From 89aaca54ba60e91f02c1c168fbef5d71f71a6d43 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Aug 2017 09:27:35 +0530 Subject: [PATCH 252/279] bnxt_re: Don't issue cmd to delete GID for QP1 GID entry before the QP is destroyed FW needs the 0th GID Entry in the Table to be preserved before it's corresponding QP1 is deleted, else it will fail the cmd. Check for the same and return to prevent error msg being logged for cmd failure. Signed-off-by: Somnath Kotur Signed-off-by: Doug Ledford --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index d7be4e4227ce..0d89621d9fe8 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -314,6 +314,7 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, struct bnxt_re_gid_ctx *ctx, **ctx_tbl; struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl; + struct bnxt_qplib_gid *gid_to_del; /* Delete the entry from the hardware */ ctx = *context; @@ -323,11 +324,25 @@ int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num, if (sgid_tbl && sgid_tbl->active) { if (ctx->idx >= sgid_tbl->max) return -EINVAL; + gid_to_del = &sgid_tbl->tbl[ctx->idx]; + /* DEL_GID is called in WQ context(netdevice_event_work_handler) + * or via the ib_unregister_device path. In the former case QP1 + * may not be destroyed yet, in which case just return as FW + * needs that entry to be present and will fail it's deletion. + * We could get invoked again after QP1 is destroyed OR get an + * ADD_GID call with a different GID value for the same index + * where we issue MODIFY_GID cmd to update the GID entry -- TBD + */ + if (ctx->idx == 0 && + rdma_link_local_addr((struct in6_addr *)gid_to_del) && + ctx->refcnt == 1 && rdev->qp1_sqp) { + dev_dbg(rdev_to_dev(rdev), + "Trying to delete GID0 while QP1 is alive\n"); + return -EFAULT; + } ctx->refcnt--; if (!ctx->refcnt) { - rc = bnxt_qplib_del_sgid(sgid_tbl, - &sgid_tbl->tbl[ctx->idx], - true); + rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to remove GID: %#x", rc); @@ -811,6 +826,8 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) kfree(rdev->sqp_ah); kfree(rdev->qp1_sqp); + rdev->qp1_sqp = NULL; + rdev->sqp_ah = NULL; } if (!IS_ERR_OR_NULL(qp->rumem)) From 19fe43a54fb67b6cc8857e65c78e1dc8aa2e97a3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Jul 2017 10:56:21 +0200 Subject: [PATCH 253/279] apparmor: Fix shadowed local variable in unpack_trans_table() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit with W=2: security/apparmor/policy_unpack.c: In function ‘unpack_trans_table’: security/apparmor/policy_unpack.c:469: warning: declaration of ‘pos’ shadows a previous local security/apparmor/policy_unpack.c:451: warning: shadowed declaration is here Rename the old "pos" to "saved_pos" to fix this. Fixes: 5379a3312024a8be ("apparmor: support v7 transition format compatible with label_parse") Signed-off-by: Geert Uytterhoeven Reviewed-by: Serge Hallyn Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index c600f4dd1783..2d5a1a007b06 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -448,7 +448,7 @@ fail: */ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) { - void *pos = e->pos; + void *saved_pos = e->pos; /* exec table is optional */ if (unpack_nameX(e, AA_STRUCT, "xtable")) { @@ -511,7 +511,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile) fail: aa_free_domain_entries(&profile->file.trans); - e->pos = pos; + e->pos = saved_pos; return 0; } From 86aea56f14929ff1c05eca1776e9068e907429d5 Mon Sep 17 00:00:00 2001 From: Christos Gkekas Date: Sat, 8 Jul 2017 20:50:21 +0100 Subject: [PATCH 254/279] apparmor: Fix logical error in verify_header() verify_header() is currently checking whether interface version is less than 5 *and* greater than 7, which always evaluates to false. Instead it should check whether it is less than 5 *or* greater than 7. Signed-off-by: Christos Gkekas Signed-off-by: John Johansen --- security/apparmor/policy_unpack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 2d5a1a007b06..bda0dce3b582 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -832,7 +832,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns) * if not specified use previous version * Mask off everything that is not kernel abi version */ - if (VERSION_LT(e->version, v5) && VERSION_GT(e->version, v7)) { + if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) { audit_iface(NULL, NULL, NULL, "unsupported interface version", e, error); return error; From 5d314a81eca29b01939930c1c596dfa44937e970 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Jul 2017 10:39:20 +0300 Subject: [PATCH 255/279] apparmor: Fix an error code in aafs_create() We accidentally forgot to set the error code on this path. It means we return NULL instead of an error pointer. I looked through a bunch of callers and I don't think it really causes a big issue, but the documentation says we're supposed to return error pointers here. Signed-off-by: Dan Carpenter Acked-by: Serge Hallyn Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 853c2ec8e0c9..2caeb748070c 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -248,8 +248,10 @@ static struct dentry *aafs_create(const char *name, umode_t mode, inode_lock(dir); dentry = lookup_one_len(name, parent, strlen(name)); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + error = PTR_ERR(dentry); goto fail_lock; + } if (d_really_is_positive(dentry)) { error = -EEXIST; From c5561700c9cb951ec3a33a0914c840423b09d7c9 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Mon, 31 Jul 2017 23:44:37 -0700 Subject: [PATCH 256/279] apparmor: Redundant condition: prev_ns. in [label.c:1498] Reported-by: David Binderman Signed-off-by: John Johansen --- security/apparmor/label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index e052eaba1cf6..e324f4df3e34 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1495,7 +1495,7 @@ static int aa_profile_snxprint(char *str, size_t size, struct aa_ns *view, view = profiles_ns(profile); if (view != profile->ns && - (!prev_ns || (prev_ns && *prev_ns != profile->ns))) { + (!prev_ns || (*prev_ns != profile->ns))) { if (prev_ns) *prev_ns = profile->ns; ns_name = aa_ns_name(view, profile->ns, From cd1dbf76b23d5ab2cba5e657fe20b1e236a408cc Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 18 Jul 2017 22:56:22 -0700 Subject: [PATCH 257/279] apparmor: add the ability to mediate signals Add signal mediation where the signal can be mediated based on the signal, direction, or the label or the peer/target. The signal perms are verified on a cross check to ensure policy consistency in the case of incremental policy load/replacement. The optimization of skipping the cross check when policy is guaranteed to be consistent (single compile unit) remains to be done. policy rules have the form of SIGNAL_RULE = [ QUALIFIERS ] 'signal' [ SIGNAL ACCESS PERMISSIONS ] [ SIGNAL SET ] [ SIGNAL PEER ] SIGNAL ACCESS PERMISSIONS = SIGNAL ACCESS | SIGNAL ACCESS LIST SIGNAL ACCESS LIST = '(' Comma or space separated list of SIGNAL ACCESS ')' SIGNAL ACCESS = ( 'r' | 'w' | 'rw' | 'read' | 'write' | 'send' | 'receive' ) SIGNAL SET = 'set' '=' '(' SIGNAL LIST ')' SIGNAL LIST = Comma or space separated list of SIGNALS SIGNALS = ( 'hup' | 'int' | 'quit' | 'ill' | 'trap' | 'abrt' | 'bus' | 'fpe' | 'kill' | 'usr1' | 'segv' | 'usr2' | 'pipe' | 'alrm' | 'term' | 'stkflt' | 'chld' | 'cont' | 'stop' | 'stp' | 'ttin' | 'ttou' | 'urg' | 'xcpu' | 'xfsz' | 'vtalrm' | 'prof' | 'winch' | 'io' | 'pwr' | 'sys' | 'emt' | 'exists' | 'rtmin+0' ... 'rtmin+32' ) SIGNAL PEER = 'peer' '=' AARE eg. signal, # allow all signals signal send set=(hup, kill) peer=foo, Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/apparmorfs.c | 7 ++ security/apparmor/include/apparmor.h | 1 + security/apparmor/include/audit.h | 2 + security/apparmor/include/ipc.h | 6 ++ security/apparmor/include/sig_names.h | 95 +++++++++++++++++++++++++ security/apparmor/ipc.c | 99 +++++++++++++++++++++++++++ security/apparmor/lsm.c | 21 ++++++ 7 files changed, 231 insertions(+) create mode 100644 security/apparmor/include/sig_names.h diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2caeb748070c..a5f9e1aa51f7 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -32,6 +32,7 @@ #include "include/audit.h" #include "include/context.h" #include "include/crypto.h" +#include "include/ipc.h" #include "include/policy_ns.h" #include "include/label.h" #include "include/policy.h" @@ -2129,6 +2130,11 @@ static struct aa_sfs_entry aa_sfs_entry_ptrace[] = { { } }; +static struct aa_sfs_entry aa_sfs_entry_signal[] = { + AA_SFS_FILE_STRING("mask", AA_SFS_SIG_MASK), + { } +}; + static struct aa_sfs_entry aa_sfs_entry_domain[] = { AA_SFS_FILE_BOOLEAN("change_hat", 1), AA_SFS_FILE_BOOLEAN("change_hatv", 1), @@ -2179,6 +2185,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("rlimit", aa_sfs_entry_rlimit), AA_SFS_DIR("caps", aa_sfs_entry_caps), AA_SFS_DIR("ptrace", aa_sfs_entry_ptrace), + AA_SFS_DIR("signal", aa_sfs_entry_signal), AA_SFS_DIR("query", aa_sfs_entry_query), { } }; diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index aaf893f4e4f5..962a20a75e01 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -28,6 +28,7 @@ #define AA_CLASS_RLIMITS 5 #define AA_CLASS_DOMAIN 6 #define AA_CLASS_PTRACE 9 +#define AA_CLASS_SIGNAL 10 #define AA_CLASS_LABEL 16 #define AA_CLASS_LAST AA_CLASS_LABEL diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index c68839a44351..d9a156ae11b9 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -86,6 +86,7 @@ enum audit_type { #define OP_SHUTDOWN "socket_shutdown" #define OP_PTRACE "ptrace" +#define OP_SIGNAL "signal" #define OP_EXEC "exec" @@ -126,6 +127,7 @@ struct apparmor_audit_data { long pos; const char *ns; } iface; + int signal; struct { int rlim; unsigned long max; diff --git a/security/apparmor/include/ipc.h b/security/apparmor/include/ipc.h index 656fdb81c8a0..5ffc218d1e74 100644 --- a/security/apparmor/include/ipc.h +++ b/security/apparmor/include/ipc.h @@ -27,8 +27,14 @@ struct aa_profile; #define AA_PTRACE_PERM_MASK (AA_PTRACE_READ | AA_PTRACE_TRACE | \ AA_MAY_BE_READ | AA_MAY_BE_TRACED) +#define AA_SIGNAL_PERM_MASK (MAY_READ | MAY_WRITE) + +#define AA_SFS_SIG_MASK "hup int quit ill trap abrt bus fpe kill usr1 " \ + "segv usr2 pipe alrm term stkflt chld cont stop stp ttin ttou urg " \ + "xcpu xfsz vtalrm prof winch io pwr sys emt lost" int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, u32 request); +int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig); #endif /* __AA_IPC_H */ diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h new file mode 100644 index 000000000000..0d4395f231ca --- /dev/null +++ b/security/apparmor/include/sig_names.h @@ -0,0 +1,95 @@ +#include + +#define SIGUNKNOWN 0 +#define MAXMAPPED_SIG 35 +/* provide a mapping of arch signal to internal signal # for mediation + * those that are always an alias SIGCLD for SIGCLHD and SIGPOLL for SIGIO + * map to the same entry those that may/or may not get a separate entry + */ +static const int sig_map[MAXMAPPED_SIG] = { + [0] = MAXMAPPED_SIG, /* existence test */ + [SIGHUP] = 1, + [SIGINT] = 2, + [SIGQUIT] = 3, + [SIGILL] = 4, + [SIGTRAP] = 5, /* -, 5, - */ + [SIGABRT] = 6, /* SIGIOT: -, 6, - */ + [SIGBUS] = 7, /* 10, 7, 10 */ + [SIGFPE] = 8, + [SIGKILL] = 9, + [SIGUSR1] = 10, /* 30, 10, 16 */ + [SIGSEGV] = 11, + [SIGUSR2] = 12, /* 31, 12, 17 */ + [SIGPIPE] = 13, + [SIGALRM] = 14, + [SIGTERM] = 15, + [SIGSTKFLT] = 16, /* -, 16, - */ + [SIGCHLD] = 17, /* 20, 17, 18. SIGCHLD -, -, 18 */ + [SIGCONT] = 18, /* 19, 18, 25 */ + [SIGSTOP] = 19, /* 17, 19, 23 */ + [SIGTSTP] = 20, /* 18, 20, 24 */ + [SIGTTIN] = 21, /* 21, 21, 26 */ + [SIGTTOU] = 22, /* 22, 22, 27 */ + [SIGURG] = 23, /* 16, 23, 21 */ + [SIGXCPU] = 24, /* 24, 24, 30 */ + [SIGXFSZ] = 25, /* 25, 25, 31 */ + [SIGVTALRM] = 26, /* 26, 26, 28 */ + [SIGPROF] = 27, /* 27, 27, 29 */ + [SIGWINCH] = 28, /* 28, 28, 20 */ + [SIGIO] = 29, /* SIGPOLL: 23, 29, 22 */ + [SIGPWR] = 30, /* 29, 30, 19. SIGINFO 29, -, - */ +#ifdef SIGSYS + [SIGSYS] = 31, /* 12, 31, 12. often SIG LOST/UNUSED */ +#endif +#ifdef SIGEMT + [SIGEMT] = 32, /* 7, - , 7 */ +#endif +#if defined(SIGLOST) && SIGPWR != SIGLOST /* sparc */ + [SIGLOST] = 33, /* unused on Linux */ +#endif +#if defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS + [SIGUNUSED] = 34, /* -, 31, - */ +#endif +}; + +/* this table is ordered post sig_map[sig] mapping */ +static const char *const sig_names[MAXMAPPED_SIG + 1] = { + "unknown", + "hup", + "int", + "quit", + "ill", + "trap", + "abrt", + "bus", + "fpe", + "kill", + "usr1", + "segv", + "usr2", + "pipe", + "alrm", + "term", + "stkflt", + "chld", + "cont", + "stop", + "stp", + "ttin", + "ttou", + "urg", + "xcpu", + "xfsz", + "vtalrm", + "prof", + "winch", + "io", + "pwr", + "sys", + "emt", + "lost", + "unused", + + "exists", /* always last existence test mapped to MAXMAPPED_SIG */ +}; + diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c index 11e66b5bbc42..66fb9ede9447 100644 --- a/security/apparmor/ipc.c +++ b/security/apparmor/ipc.c @@ -20,6 +20,7 @@ #include "include/context.h" #include "include/policy.h" #include "include/ipc.h" +#include "include/sig_names.h" /** * audit_ptrace_mask - convert mask to permission string @@ -121,3 +122,101 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee, } +static inline int map_signal_num(int sig) +{ + if (sig > SIGRTMAX) + return SIGUNKNOWN; + else if (sig >= SIGRTMIN) + return sig - SIGRTMIN + 128; /* rt sigs mapped to 128 */ + else if (sig <= MAXMAPPED_SIG) + return sig_map[sig]; + return SIGUNKNOWN; +} + +/** + * audit_file_mask - convert mask to permission string + * @buffer: buffer to write string to (NOT NULL) + * @mask: permission mask to convert + */ +static void audit_signal_mask(struct audit_buffer *ab, u32 mask) +{ + if (mask & MAY_READ) + audit_log_string(ab, "receive"); + if (mask & MAY_WRITE) + audit_log_string(ab, "send"); +} + +/** + * audit_cb - call back for signal specific audit fields + * @ab: audit_buffer (NOT NULL) + * @va: audit struct to audit values of (NOT NULL) + */ +static void audit_signal_cb(struct audit_buffer *ab, void *va) +{ + struct common_audit_data *sa = va; + + if (aad(sa)->request & AA_SIGNAL_PERM_MASK) { + audit_log_format(ab, " requested_mask="); + audit_signal_mask(ab, aad(sa)->request); + if (aad(sa)->denied & AA_SIGNAL_PERM_MASK) { + audit_log_format(ab, " denied_mask="); + audit_signal_mask(ab, aad(sa)->denied); + } + } + if (aad(sa)->signal <= MAXMAPPED_SIG) + audit_log_format(ab, " signal=%s", sig_names[aad(sa)->signal]); + else + audit_log_format(ab, " signal=rtmin+%d", + aad(sa)->signal - 128); + audit_log_format(ab, " peer="); + aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + FLAGS_NONE, GFP_ATOMIC); +} + +/* TODO: update to handle compound name&name2, conditionals */ +static void profile_match_signal(struct aa_profile *profile, const char *label, + int signal, struct aa_perms *perms) +{ + unsigned int state; + + /* TODO: secondary cache check */ + state = aa_dfa_next(profile->policy.dfa, + profile->policy.start[AA_CLASS_SIGNAL], + signal); + state = aa_dfa_match(profile->policy.dfa, state, label); + aa_compute_perms(profile->policy.dfa, state, perms); +} + +static int profile_signal_perm(struct aa_profile *profile, + struct aa_profile *peer, u32 request, + struct common_audit_data *sa) +{ + struct aa_perms perms; + + if (profile_unconfined(profile) || + !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL)) + return 0; + + aad(sa)->peer = &peer->label; + profile_match_signal(profile, peer->base.hname, aad(sa)->signal, + &perms); + aa_apply_modes_to_perms(profile, &perms); + return aa_check_perms(profile, &perms, request, sa, audit_signal_cb); +} + +static int aa_signal_cross_perm(struct aa_profile *sender, + struct aa_profile *target, + struct common_audit_data *sa) +{ + return xcheck(profile_signal_perm(sender, target, MAY_WRITE, sa), + profile_signal_perm(target, sender, MAY_READ, sa)); +} + +int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig) +{ + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL); + + aad(&sa)->signal = map_signal_num(sig); + return xcheck_labels_profiles(sender, target, aa_signal_cross_perm, + &sa); +} diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 867bcd154c7e..af22f3dfbcce 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -656,6 +656,26 @@ static int apparmor_task_setrlimit(struct task_struct *task, return error; } +static int apparmor_task_kill(struct task_struct *target, struct siginfo *info, + int sig, u32 secid) +{ + struct aa_label *cl, *tl; + int error; + + if (secid) + /* TODO: after secid to label mapping is done. + * Dealing with USB IO specific behavior + */ + return 0; + cl = __begin_current_label_crit_section(); + tl = aa_get_task_label(target); + error = aa_may_signal(cl, tl, sig); + aa_put_label(tl); + __end_current_label_crit_section(cl); + + return error; +} + static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -697,6 +717,7 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bprm_secureexec, apparmor_bprm_secureexec), LSM_HOOK_INIT(task_setrlimit, apparmor_task_setrlimit), + LSM_HOOK_INIT(task_kill, apparmor_task_kill), }; /* From 2ea3ffb7782a84da33a8382f13ebd016da50079b Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 18 Jul 2017 23:04:47 -0700 Subject: [PATCH 258/279] apparmor: add mount mediation Add basic mount mediation. That allows controlling based on basic mount parameters. It does not include special mount parameters for apparmor, super block labeling, or any triggers for apparmor namespace parameter modifications on pivot root. default userspace policy rules have the form of MOUNT RULE = ( MOUNT | REMOUNT | UMOUNT ) MOUNT = [ QUALIFIERS ] 'mount' [ MOUNT CONDITIONS ] [ SOURCE FILEGLOB ] [ '->' MOUNTPOINT FILEGLOB ] REMOUNT = [ QUALIFIERS ] 'remount' [ MOUNT CONDITIONS ] MOUNTPOINT FILEGLOB UMOUNT = [ QUALIFIERS ] 'umount' [ MOUNT CONDITIONS ] MOUNTPOINT FILEGLOB MOUNT CONDITIONS = [ ( 'fstype' | 'vfstype' ) ( '=' | 'in' ) MOUNT FSTYPE EXPRESSION ] [ 'options' ( '=' | 'in' ) MOUNT FLAGS EXPRESSION ] MOUNT FSTYPE EXPRESSION = ( MOUNT FSTYPE LIST | MOUNT EXPRESSION ) MOUNT FSTYPE LIST = Comma separated list of valid filesystem and virtual filesystem types (eg ext4, debugfs, etc) MOUNT FLAGS EXPRESSION = ( MOUNT FLAGS LIST | MOUNT EXPRESSION ) MOUNT FLAGS LIST = Comma separated list of MOUNT FLAGS. MOUNT FLAGS = ( 'ro' | 'rw' | 'nosuid' | 'suid' | 'nodev' | 'dev' | 'noexec' | 'exec' | 'sync' | 'async' | 'remount' | 'mand' | 'nomand' | 'dirsync' | 'noatime' | 'atime' | 'nodiratime' | 'diratime' | 'bind' | 'rbind' | 'move' | 'verbose' | 'silent' | 'loud' | 'acl' | 'noacl' | 'unbindable' | 'runbindable' | 'private' | 'rprivate' | 'slave' | 'rslave' | 'shared' | 'rshared' | 'relatime' | 'norelatime' | 'iversion' | 'noiversion' | 'strictatime' | 'nouser' | 'user' ) MOUNT EXPRESSION = ( ALPHANUMERIC | AARE ) ... PIVOT ROOT RULE = [ QUALIFIERS ] pivot_root [ oldroot=OLD PUT FILEGLOB ] [ NEW ROOT FILEGLOB ] SOURCE FILEGLOB = FILEGLOB MOUNTPOINT FILEGLOB = FILEGLOB eg. mount, mount /dev/foo, mount options=ro /dev/foo -> /mnt/, mount options in (ro,atime) /dev/foo -> /mnt/, mount options=ro options=atime, Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/Makefile | 2 +- security/apparmor/apparmorfs.c | 8 +- security/apparmor/domain.c | 4 +- security/apparmor/include/apparmor.h | 1 + security/apparmor/include/audit.h | 11 + security/apparmor/include/domain.h | 5 + security/apparmor/include/mount.h | 54 +++ security/apparmor/lsm.c | 64 +++ security/apparmor/mount.c | 696 +++++++++++++++++++++++++++ 9 files changed, 841 insertions(+), 4 deletions(-) create mode 100644 security/apparmor/include/mount.h create mode 100644 security/apparmor/mount.c diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index a16b195274de..81a34426d024 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ - resource.o secid.o file.o policy_ns.o label.o + resource.o secid.o file.o policy_ns.o label.o mount.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o clean-files := capability_names.h rlim_names.h diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index a5f9e1aa51f7..8fa6c898c44b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2159,9 +2159,14 @@ static struct aa_sfs_entry aa_sfs_entry_policy[] = { { } }; +static struct aa_sfs_entry aa_sfs_entry_mount[] = { + AA_SFS_FILE_STRING("mask", "mount umount pivot_root"), + { } +}; + static struct aa_sfs_entry aa_sfs_entry_ns[] = { AA_SFS_FILE_BOOLEAN("profile", 1), - AA_SFS_FILE_BOOLEAN("pivot_root", 1), + AA_SFS_FILE_BOOLEAN("pivot_root", 0), { } }; @@ -2180,6 +2185,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("policy", aa_sfs_entry_policy), AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), + AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), AA_SFS_DIR("rlimit", aa_sfs_entry_rlimit), diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index d0594446ae3f..ffc8c75a6785 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -374,8 +374,8 @@ static const char *next_name(int xtype, const char *name) * * Returns: refcounted label, or NULL on failure (MAYBE NULL) */ -static struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, - const char **name) +struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, + const char **name) { struct aa_label *label = NULL; u32 xtype = xindex & AA_X_TYPE_MASK; diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h index 962a20a75e01..829082c35faa 100644 --- a/security/apparmor/include/apparmor.h +++ b/security/apparmor/include/apparmor.h @@ -27,6 +27,7 @@ #define AA_CLASS_NET 4 #define AA_CLASS_RLIMITS 5 #define AA_CLASS_DOMAIN 6 +#define AA_CLASS_MOUNT 7 #define AA_CLASS_PTRACE 9 #define AA_CLASS_SIGNAL 10 #define AA_CLASS_LABEL 16 diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index d9a156ae11b9..c3fe1c5ef3bc 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -71,6 +71,10 @@ enum audit_type { #define OP_FMPROT "file_mprotect" #define OP_INHERIT "file_inherit" +#define OP_PIVOTROOT "pivotroot" +#define OP_MOUNT "mount" +#define OP_UMOUNT "umount" + #define OP_CREATE "create" #define OP_POST_CREATE "post_create" #define OP_BIND "bind" @@ -132,6 +136,13 @@ struct apparmor_audit_data { int rlim; unsigned long max; } rlim; + struct { + const char *src_name; + const char *type; + const char *trans; + const char *data; + unsigned long flags; + } mnt; }; }; diff --git a/security/apparmor/include/domain.h b/security/apparmor/include/domain.h index bab5810b6e9a..db27403346c5 100644 --- a/security/apparmor/include/domain.h +++ b/security/apparmor/include/domain.h @@ -15,6 +15,8 @@ #include #include +#include "label.h" + #ifndef __AA_DOMAIN_H #define __AA_DOMAIN_H @@ -29,6 +31,9 @@ struct aa_domain { #define AA_CHANGE_ONEXEC 4 #define AA_CHANGE_STACK 8 +struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex, + const char **name); + int apparmor_bprm_set_creds(struct linux_binprm *bprm); int apparmor_bprm_secureexec(struct linux_binprm *bprm); diff --git a/security/apparmor/include/mount.h b/security/apparmor/include/mount.h new file mode 100644 index 000000000000..25d6067fa6ef --- /dev/null +++ b/security/apparmor/include/mount.h @@ -0,0 +1,54 @@ +/* + * AppArmor security module + * + * This file contains AppArmor file mediation function definitions. + * + * Copyright 2017 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#ifndef __AA_MOUNT_H +#define __AA_MOUNT_H + +#include +#include + +#include "domain.h" +#include "policy.h" + +/* mount perms */ +#define AA_MAY_PIVOTROOT 0x01 +#define AA_MAY_MOUNT 0x02 +#define AA_MAY_UMOUNT 0x04 +#define AA_AUDIT_DATA 0x40 +#define AA_MNT_CONT_MATCH 0x40 + +#define AA_MS_IGNORE_MASK (MS_KERNMOUNT | MS_NOSEC | MS_ACTIVE | MS_BORN) + +int aa_remount(struct aa_label *label, const struct path *path, + unsigned long flags, void *data); + +int aa_bind_mount(struct aa_label *label, const struct path *path, + const char *old_name, unsigned long flags); + + +int aa_mount_change_type(struct aa_label *label, const struct path *path, + unsigned long flags); + +int aa_move_mount(struct aa_label *label, const struct path *path, + const char *old_name); + +int aa_new_mount(struct aa_label *label, const char *dev_name, + const struct path *path, const char *type, unsigned long flags, + void *data); + +int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags); + +int aa_pivotroot(struct aa_label *label, const struct path *old_path, + const struct path *new_path); + +#endif /* __AA_MOUNT_H */ diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index af22f3dfbcce..4ad0b3a45142 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -38,6 +38,7 @@ #include "include/policy.h" #include "include/policy_ns.h" #include "include/procattr.h" +#include "include/mount.h" /* Flag indicating whether initialization completed */ int apparmor_initialized; @@ -511,6 +512,65 @@ static int apparmor_file_mprotect(struct vm_area_struct *vma, !(vma->vm_flags & VM_SHARED) ? MAP_PRIVATE : 0); } +static int apparmor_sb_mount(const char *dev_name, const struct path *path, + const char *type, unsigned long flags, void *data) +{ + struct aa_label *label; + int error = 0; + + /* Discard magic */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; + + flags &= ~AA_MS_IGNORE_MASK; + + label = __begin_current_label_crit_section(); + if (!unconfined(label)) { + if (flags & MS_REMOUNT) + error = aa_remount(label, path, flags, data); + else if (flags & MS_BIND) + error = aa_bind_mount(label, path, dev_name, flags); + else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | + MS_UNBINDABLE)) + error = aa_mount_change_type(label, path, flags); + else if (flags & MS_MOVE) + error = aa_move_mount(label, path, dev_name); + else + error = aa_new_mount(label, dev_name, path, type, + flags, data); + } + __end_current_label_crit_section(label); + + return error; +} + +static int apparmor_sb_umount(struct vfsmount *mnt, int flags) +{ + struct aa_label *label; + int error = 0; + + label = __begin_current_label_crit_section(); + if (!unconfined(label)) + error = aa_umount(label, mnt, flags); + __end_current_label_crit_section(label); + + return error; +} + +static int apparmor_sb_pivotroot(const struct path *old_path, + const struct path *new_path) +{ + struct aa_label *label; + int error = 0; + + label = aa_get_current_label(); + if (!unconfined(label)) + error = aa_pivotroot(label, old_path, new_path); + aa_put_label(label); + + return error; +} + static int apparmor_getprocattr(struct task_struct *task, char *name, char **value) { @@ -682,6 +742,10 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(capget, apparmor_capget), LSM_HOOK_INIT(capable, apparmor_capable), + LSM_HOOK_INIT(sb_mount, apparmor_sb_mount), + LSM_HOOK_INIT(sb_umount, apparmor_sb_umount), + LSM_HOOK_INIT(sb_pivotroot, apparmor_sb_pivotroot), + LSM_HOOK_INIT(path_link, apparmor_path_link), LSM_HOOK_INIT(path_unlink, apparmor_path_unlink), LSM_HOOK_INIT(path_symlink, apparmor_path_symlink), diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c new file mode 100644 index 000000000000..82a64b58041d --- /dev/null +++ b/security/apparmor/mount.c @@ -0,0 +1,696 @@ +/* + * AppArmor security module + * + * This file contains AppArmor mediation of files + * + * Copyright (C) 1998-2008 Novell/SUSE + * Copyright 2009-2017 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include +#include +#include + +#include "include/apparmor.h" +#include "include/audit.h" +#include "include/context.h" +#include "include/domain.h" +#include "include/file.h" +#include "include/match.h" +#include "include/mount.h" +#include "include/path.h" +#include "include/policy.h" + + +static void audit_mnt_flags(struct audit_buffer *ab, unsigned long flags) +{ + if (flags & MS_RDONLY) + audit_log_format(ab, "ro"); + else + audit_log_format(ab, "rw"); + if (flags & MS_NOSUID) + audit_log_format(ab, ", nosuid"); + if (flags & MS_NODEV) + audit_log_format(ab, ", nodev"); + if (flags & MS_NOEXEC) + audit_log_format(ab, ", noexec"); + if (flags & MS_SYNCHRONOUS) + audit_log_format(ab, ", sync"); + if (flags & MS_REMOUNT) + audit_log_format(ab, ", remount"); + if (flags & MS_MANDLOCK) + audit_log_format(ab, ", mand"); + if (flags & MS_DIRSYNC) + audit_log_format(ab, ", dirsync"); + if (flags & MS_NOATIME) + audit_log_format(ab, ", noatime"); + if (flags & MS_NODIRATIME) + audit_log_format(ab, ", nodiratime"); + if (flags & MS_BIND) + audit_log_format(ab, flags & MS_REC ? ", rbind" : ", bind"); + if (flags & MS_MOVE) + audit_log_format(ab, ", move"); + if (flags & MS_SILENT) + audit_log_format(ab, ", silent"); + if (flags & MS_POSIXACL) + audit_log_format(ab, ", acl"); + if (flags & MS_UNBINDABLE) + audit_log_format(ab, flags & MS_REC ? ", runbindable" : + ", unbindable"); + if (flags & MS_PRIVATE) + audit_log_format(ab, flags & MS_REC ? ", rprivate" : + ", private"); + if (flags & MS_SLAVE) + audit_log_format(ab, flags & MS_REC ? ", rslave" : + ", slave"); + if (flags & MS_SHARED) + audit_log_format(ab, flags & MS_REC ? ", rshared" : + ", shared"); + if (flags & MS_RELATIME) + audit_log_format(ab, ", relatime"); + if (flags & MS_I_VERSION) + audit_log_format(ab, ", iversion"); + if (flags & MS_STRICTATIME) + audit_log_format(ab, ", strictatime"); + if (flags & MS_NOUSER) + audit_log_format(ab, ", nouser"); +} + +/** + * audit_cb - call back for mount specific audit fields + * @ab: audit_buffer (NOT NULL) + * @va: audit struct to audit values of (NOT NULL) + */ +static void audit_cb(struct audit_buffer *ab, void *va) +{ + struct common_audit_data *sa = va; + + if (aad(sa)->mnt.type) { + audit_log_format(ab, " fstype="); + audit_log_untrustedstring(ab, aad(sa)->mnt.type); + } + if (aad(sa)->mnt.src_name) { + audit_log_format(ab, " srcname="); + audit_log_untrustedstring(ab, aad(sa)->mnt.src_name); + } + if (aad(sa)->mnt.trans) { + audit_log_format(ab, " trans="); + audit_log_untrustedstring(ab, aad(sa)->mnt.trans); + } + if (aad(sa)->mnt.flags) { + audit_log_format(ab, " flags=\""); + audit_mnt_flags(ab, aad(sa)->mnt.flags); + audit_log_format(ab, "\""); + } + if (aad(sa)->mnt.data) { + audit_log_format(ab, " options="); + audit_log_untrustedstring(ab, aad(sa)->mnt.data); + } +} + +/** + * audit_mount - handle the auditing of mount operations + * @profile: the profile being enforced (NOT NULL) + * @op: operation being mediated (NOT NULL) + * @name: name of object being mediated (MAYBE NULL) + * @src_name: src_name of object being mediated (MAYBE_NULL) + * @type: type of filesystem (MAYBE_NULL) + * @trans: name of trans (MAYBE NULL) + * @flags: filesystem idependent mount flags + * @data: filesystem mount flags + * @request: permissions requested + * @perms: the permissions computed for the request (NOT NULL) + * @info: extra information message (MAYBE NULL) + * @error: 0 if operation allowed else failure error code + * + * Returns: %0 or error on failure + */ +static int audit_mount(struct aa_profile *profile, const char *op, + const char *name, const char *src_name, + const char *type, const char *trans, + unsigned long flags, const void *data, u32 request, + struct aa_perms *perms, const char *info, int error) +{ + int audit_type = AUDIT_APPARMOR_AUTO; + DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op); + + if (likely(!error)) { + u32 mask = perms->audit; + + if (unlikely(AUDIT_MODE(profile) == AUDIT_ALL)) + mask = 0xffff; + + /* mask off perms that are not being force audited */ + request &= mask; + + if (likely(!request)) + return 0; + audit_type = AUDIT_APPARMOR_AUDIT; + } else { + /* only report permissions that were denied */ + request = request & ~perms->allow; + + if (request & perms->kill) + audit_type = AUDIT_APPARMOR_KILL; + + /* quiet known rejects, assumes quiet and kill do not overlap */ + if ((request & perms->quiet) && + AUDIT_MODE(profile) != AUDIT_NOQUIET && + AUDIT_MODE(profile) != AUDIT_ALL) + request &= ~perms->quiet; + + if (!request) + return error; + } + + aad(&sa)->name = name; + aad(&sa)->mnt.src_name = src_name; + aad(&sa)->mnt.type = type; + aad(&sa)->mnt.trans = trans; + aad(&sa)->mnt.flags = flags; + if (data && (perms->audit & AA_AUDIT_DATA)) + aad(&sa)->mnt.data = data; + aad(&sa)->info = info; + aad(&sa)->error = error; + + return aa_audit(audit_type, profile, &sa, audit_cb); +} + +/** + * match_mnt_flags - Do an ordered match on mount flags + * @dfa: dfa to match against + * @state: state to start in + * @flags: mount flags to match against + * + * Mount flags are encoded as an ordered match. This is done instead of + * checking against a simple bitmask, to allow for logical operations + * on the flags. + * + * Returns: next state after flags match + */ +static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state, + unsigned long flags) +{ + unsigned int i; + + for (i = 0; i <= 31 ; ++i) { + if ((1 << i) & flags) + state = aa_dfa_next(dfa, state, i + 1); + } + + return state; +} + +/** + * compute_mnt_perms - compute mount permission associated with @state + * @dfa: dfa to match against (NOT NULL) + * @state: state match finished in + * + * Returns: mount permissions + */ +static struct aa_perms compute_mnt_perms(struct aa_dfa *dfa, + unsigned int state) +{ + struct aa_perms perms; + + perms.kill = 0; + perms.allow = dfa_user_allow(dfa, state); + perms.audit = dfa_user_audit(dfa, state); + perms.quiet = dfa_user_quiet(dfa, state); + perms.xindex = dfa_user_xindex(dfa, state); + + return perms; +} + +static const char * const mnt_info_table[] = { + "match succeeded", + "failed mntpnt match", + "failed srcname match", + "failed type match", + "failed flags match", + "failed data match" +}; + +/* + * Returns 0 on success else element that match failed in, this is the + * index into the mnt_info_table above + */ +static int do_match_mnt(struct aa_dfa *dfa, unsigned int start, + const char *mntpnt, const char *devname, + const char *type, unsigned long flags, + void *data, bool binary, struct aa_perms *perms) +{ + unsigned int state; + + AA_BUG(!dfa); + AA_BUG(!perms); + + state = aa_dfa_match(dfa, start, mntpnt); + state = aa_dfa_null_transition(dfa, state); + if (!state) + return 1; + + if (devname) + state = aa_dfa_match(dfa, state, devname); + state = aa_dfa_null_transition(dfa, state); + if (!state) + return 2; + + if (type) + state = aa_dfa_match(dfa, state, type); + state = aa_dfa_null_transition(dfa, state); + if (!state) + return 3; + + state = match_mnt_flags(dfa, state, flags); + if (!state) + return 4; + *perms = compute_mnt_perms(dfa, state); + if (perms->allow & AA_MAY_MOUNT) + return 0; + + /* only match data if not binary and the DFA flags data is expected */ + if (data && !binary && (perms->allow & AA_MNT_CONT_MATCH)) { + state = aa_dfa_null_transition(dfa, state); + if (!state) + return 4; + + state = aa_dfa_match(dfa, state, data); + if (!state) + return 5; + *perms = compute_mnt_perms(dfa, state); + if (perms->allow & AA_MAY_MOUNT) + return 0; + } + + /* failed at end of flags match */ + return 4; +} + + +static int path_flags(struct aa_profile *profile, const struct path *path) +{ + AA_BUG(!profile); + AA_BUG(!path); + + return profile->path_flags | + (S_ISDIR(path->dentry->d_inode->i_mode) ? PATH_IS_DIR : 0); +} + +/** + * match_mnt_path_str - handle path matching for mount + * @profile: the confining profile + * @mntpath: for the mntpnt (NOT NULL) + * @buffer: buffer to be used to lookup mntpath + * @devnme: string for the devname/src_name (MAY BE NULL OR ERRPTR) + * @type: string for the dev type (MAYBE NULL) + * @flags: mount flags to match + * @data: fs mount data (MAYBE NULL) + * @binary: whether @data is binary + * @devinfo: error str if (IS_ERR(@devname)) + * + * Returns: 0 on success else error + */ +static int match_mnt_path_str(struct aa_profile *profile, + const struct path *mntpath, char *buffer, + const char *devname, const char *type, + unsigned long flags, void *data, bool binary, + const char *devinfo) +{ + struct aa_perms perms = { }; + const char *mntpnt = NULL, *info = NULL; + int pos, error; + + AA_BUG(!profile); + AA_BUG(!mntpath); + AA_BUG(!buffer); + + error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer, + &mntpnt, &info, profile->disconnected); + if (error) + goto audit; + if (IS_ERR(devname)) { + error = PTR_ERR(devname); + devname = NULL; + info = devinfo; + goto audit; + } + + error = -EACCES; + pos = do_match_mnt(profile->policy.dfa, + profile->policy.start[AA_CLASS_MOUNT], + mntpnt, devname, type, flags, data, binary, &perms); + if (pos) { + info = mnt_info_table[pos]; + goto audit; + } + error = 0; + +audit: + return audit_mount(profile, OP_MOUNT, mntpnt, devname, type, NULL, + flags, data, AA_MAY_MOUNT, &perms, info, error); +} + +/** + * match_mnt - handle path matching for mount + * @profile: the confining profile + * @mntpath: for the mntpnt (NOT NULL) + * @buffer: buffer to be used to lookup mntpath + * @devpath: path devname/src_name (MAYBE NULL) + * @devbuffer: buffer to be used to lookup devname/src_name + * @type: string for the dev type (MAYBE NULL) + * @flags: mount flags to match + * @data: fs mount data (MAYBE NULL) + * @binary: whether @data is binary + * + * Returns: 0 on success else error + */ +static int match_mnt(struct aa_profile *profile, const struct path *path, + char *buffer, struct path *devpath, char *devbuffer, + const char *type, unsigned long flags, void *data, + bool binary) +{ + const char *devname = NULL, *info = NULL; + int error = -EACCES; + + AA_BUG(!profile); + AA_BUG(devpath && !devbuffer); + + if (devpath) { + error = aa_path_name(devpath, path_flags(profile, devpath), + devbuffer, &devname, &info, + profile->disconnected); + if (error) + devname = ERR_PTR(error); + } + + return match_mnt_path_str(profile, path, buffer, devname, type, flags, + data, binary, info); +} + +int aa_remount(struct aa_label *label, const struct path *path, + unsigned long flags, void *data) +{ + struct aa_profile *profile; + char *buffer = NULL; + bool binary; + int error; + + AA_BUG(!label); + AA_BUG(!path); + + binary = path->dentry->d_sb->s_type->fs_flags & FS_BINARY_MOUNTDATA; + + get_buffers(buffer); + error = fn_for_each_confined(label, profile, + match_mnt(profile, path, buffer, NULL, NULL, NULL, + flags, data, binary)); + put_buffers(buffer); + + return error; +} + +int aa_bind_mount(struct aa_label *label, const struct path *path, + const char *dev_name, unsigned long flags) +{ + struct aa_profile *profile; + char *buffer = NULL, *old_buffer = NULL; + struct path old_path; + int error; + + AA_BUG(!label); + AA_BUG(!path); + + if (!dev_name || !*dev_name) + return -EINVAL; + + flags &= MS_REC | MS_BIND; + + error = kern_path(dev_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); + if (error) + return error; + + get_buffers(buffer, old_buffer); + error = fn_for_each_confined(label, profile, + match_mnt(profile, path, buffer, &old_path, old_buffer, + NULL, flags, NULL, false)); + put_buffers(buffer, old_buffer); + path_put(&old_path); + + return error; +} + +int aa_mount_change_type(struct aa_label *label, const struct path *path, + unsigned long flags) +{ + struct aa_profile *profile; + char *buffer = NULL; + int error; + + AA_BUG(!label); + AA_BUG(!path); + + /* These are the flags allowed by do_change_type() */ + flags &= (MS_REC | MS_SILENT | MS_SHARED | MS_PRIVATE | MS_SLAVE | + MS_UNBINDABLE); + + get_buffers(buffer); + error = fn_for_each_confined(label, profile, + match_mnt(profile, path, buffer, NULL, NULL, NULL, + flags, NULL, false)); + put_buffers(buffer); + + return error; +} + +int aa_move_mount(struct aa_label *label, const struct path *path, + const char *orig_name) +{ + struct aa_profile *profile; + char *buffer = NULL, *old_buffer = NULL; + struct path old_path; + int error; + + AA_BUG(!label); + AA_BUG(!path); + + if (!orig_name || !*orig_name) + return -EINVAL; + + error = kern_path(orig_name, LOOKUP_FOLLOW, &old_path); + if (error) + return error; + + get_buffers(buffer, old_buffer); + error = fn_for_each_confined(label, profile, + match_mnt(profile, path, buffer, &old_path, old_buffer, + NULL, MS_MOVE, NULL, false)); + put_buffers(buffer, old_buffer); + path_put(&old_path); + + return error; +} + +int aa_new_mount(struct aa_label *label, const char *dev_name, + const struct path *path, const char *type, unsigned long flags, + void *data) +{ + struct aa_profile *profile; + char *buffer = NULL, *dev_buffer = NULL; + bool binary = true; + int error; + int requires_dev = 0; + struct path tmp_path, *dev_path = NULL; + + AA_BUG(!label); + AA_BUG(!path); + + if (type) { + struct file_system_type *fstype; + + fstype = get_fs_type(type); + if (!fstype) + return -ENODEV; + binary = fstype->fs_flags & FS_BINARY_MOUNTDATA; + requires_dev = fstype->fs_flags & FS_REQUIRES_DEV; + put_filesystem(fstype); + + if (requires_dev) { + if (!dev_name || !*dev_name) + return -ENOENT; + + error = kern_path(dev_name, LOOKUP_FOLLOW, &tmp_path); + if (error) + return error; + dev_path = &tmp_path; + } + } + + get_buffers(buffer, dev_buffer); + if (dev_path) { + error = fn_for_each_confined(label, profile, + match_mnt(profile, path, buffer, dev_path, dev_buffer, + type, flags, data, binary)); + } else { + error = fn_for_each_confined(label, profile, + match_mnt_path_str(profile, path, buffer, dev_name, + type, flags, data, binary, NULL)); + } + put_buffers(buffer, dev_buffer); + if (dev_path) + path_put(dev_path); + + return error; +} + +static int profile_umount(struct aa_profile *profile, struct path *path, + char *buffer) +{ + struct aa_perms perms = { }; + const char *name = NULL, *info = NULL; + unsigned int state; + int error; + + AA_BUG(!profile); + AA_BUG(!path); + + error = aa_path_name(path, path_flags(profile, path), buffer, &name, + &info, profile->disconnected); + if (error) + goto audit; + + state = aa_dfa_match(profile->policy.dfa, + profile->policy.start[AA_CLASS_MOUNT], + name); + perms = compute_mnt_perms(profile->policy.dfa, state); + if (AA_MAY_UMOUNT & ~perms.allow) + error = -EACCES; + +audit: + return audit_mount(profile, OP_UMOUNT, name, NULL, NULL, NULL, 0, NULL, + AA_MAY_UMOUNT, &perms, info, error); +} + +int aa_umount(struct aa_label *label, struct vfsmount *mnt, int flags) +{ + struct aa_profile *profile; + char *buffer = NULL; + int error; + struct path path = { .mnt = mnt, .dentry = mnt->mnt_root }; + + AA_BUG(!label); + AA_BUG(!mnt); + + get_buffers(buffer); + error = fn_for_each_confined(label, profile, + profile_umount(profile, &path, buffer)); + put_buffers(buffer); + + return error; +} + +/* helper fn for transition on pivotroot + * + * Returns: label for transition or ERR_PTR. Does not return NULL + */ +static struct aa_label *build_pivotroot(struct aa_profile *profile, + const struct path *new_path, + char *new_buffer, + const struct path *old_path, + char *old_buffer) +{ + const char *old_name, *new_name = NULL, *info = NULL; + const char *trans_name = NULL; + struct aa_perms perms = { }; + unsigned int state; + int error; + + AA_BUG(!profile); + AA_BUG(!new_path); + AA_BUG(!old_path); + + if (profile_unconfined(profile)) + return aa_get_newest_label(&profile->label); + + error = aa_path_name(old_path, path_flags(profile, old_path), + old_buffer, &old_name, &info, + profile->disconnected); + if (error) + goto audit; + error = aa_path_name(new_path, path_flags(profile, new_path), + new_buffer, &new_name, &info, + profile->disconnected); + if (error) + goto audit; + + error = -EACCES; + state = aa_dfa_match(profile->policy.dfa, + profile->policy.start[AA_CLASS_MOUNT], + new_name); + state = aa_dfa_null_transition(profile->policy.dfa, state); + state = aa_dfa_match(profile->policy.dfa, state, old_name); + perms = compute_mnt_perms(profile->policy.dfa, state); + + if (AA_MAY_PIVOTROOT & perms.allow) + error = 0; + +audit: + error = audit_mount(profile, OP_PIVOTROOT, new_name, old_name, + NULL, trans_name, 0, NULL, AA_MAY_PIVOTROOT, + &perms, info, error); + if (error) + return ERR_PTR(error); + + return aa_get_newest_label(&profile->label); +} + +int aa_pivotroot(struct aa_label *label, const struct path *old_path, + const struct path *new_path) +{ + struct aa_profile *profile; + struct aa_label *target = NULL; + char *old_buffer = NULL, *new_buffer = NULL, *info = NULL; + int error; + + AA_BUG(!label); + AA_BUG(!old_path); + AA_BUG(!new_path); + + get_buffers(old_buffer, new_buffer); + target = fn_label_build(label, profile, GFP_ATOMIC, + build_pivotroot(profile, new_path, new_buffer, + old_path, old_buffer)); + if (!target) { + info = "label build failed"; + error = -ENOMEM; + goto fail; + } else if (!IS_ERR(target)) { + error = aa_replace_current_label(target); + if (error) { + /* TODO: audit target */ + aa_put_label(target); + goto out; + } + } else + /* already audited error */ + error = PTR_ERR(target); +out: + put_buffers(old_buffer, new_buffer); + + return error; + +fail: + /* TODO: add back in auditing of new_name and old_name */ + error = fn_for_each(label, profile, + audit_mount(profile, OP_PIVOTROOT, NULL /*new_name */, + NULL /* old_name */, + NULL, NULL, + 0, NULL, AA_MAY_PIVOTROOT, &nullperms, info, + error)); + goto out; +} From f872af75d325cc449b6621a0d30a4f2ba77dd092 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 6 Aug 2017 05:36:40 -0700 Subject: [PATCH 259/279] apparmor: cleanup conditional check for label in label_print Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/label.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index e324f4df3e34..38be7a89cc31 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1450,9 +1450,11 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp) * cached label name is present and visible * @label->hname only exists if label is namespace hierachical */ -static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label) +static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label, + int flags) { - if (label->hname && labels_ns(label) == ns) + if (label->hname && (!ns || labels_ns(label) == ns) && + !(flags & ~FLAG_SHOW_MODE)) return true; return false; @@ -1710,10 +1712,8 @@ void aa_label_xaudit(struct audit_buffer *ab, struct aa_ns *ns, AA_BUG(!ab); AA_BUG(!label); - if (!ns) - ns = labels_ns(label); - - if (!use_label_hname(ns, label) || display_mode(ns, label, flags)) { + if (!use_label_hname(ns, label, flags) || + display_mode(ns, label, flags)) { len = aa_label_asxprint(&name, ns, label, flags, gfp); if (len == -1) { AA_DEBUG("label print error"); @@ -1738,10 +1738,7 @@ void aa_label_seq_xprint(struct seq_file *f, struct aa_ns *ns, AA_BUG(!f); AA_BUG(!label); - if (!ns) - ns = labels_ns(label); - - if (!use_label_hname(ns, label)) { + if (!use_label_hname(ns, label, flags)) { char *str; int len; @@ -1764,10 +1761,7 @@ void aa_label_xprintk(struct aa_ns *ns, struct aa_label *label, int flags, { AA_BUG(!label); - if (!ns) - ns = labels_ns(label); - - if (!use_label_hname(ns, label)) { + if (!use_label_hname(ns, label, flags)) { char *str; int len; From 26b7899510ae243e392960704ebdba52d05fbb13 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Sun, 6 Aug 2017 05:39:08 -0700 Subject: [PATCH 260/279] apparmor: add support for absolute root view based labels With apparmor policy virtualization based on policy namespace View's we don't generally want/need absolute root based views, however there are cases like debugging and some secid based conversions where using a root based view is important. Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/include/label.h | 1 + security/apparmor/label.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h index 9a283b722755..af22dcbbcb8a 100644 --- a/security/apparmor/include/label.h +++ b/security/apparmor/include/label.h @@ -310,6 +310,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp); #define FLAG_SHOW_MODE 1 #define FLAG_VIEW_SUBNS 2 #define FLAG_HIDDEN_UNCONFINED 4 +#define FLAG_ABS_ROOT 8 int aa_label_snxprint(char *str, size_t size, struct aa_ns *view, struct aa_label *label, int flags); int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label, diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 38be7a89cc31..52b4ef14840d 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1607,8 +1607,13 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns, AA_BUG(!str && size != 0); AA_BUG(!label); - if (!ns) + if (flags & FLAG_ABS_ROOT) { + ns = root_ns; + len = snprintf(str, size, "="); + update_for_len(total, len, size, str); + } else if (!ns) { ns = labels_ns(label); + } label_for_each(i, label, profile) { if (aa_ns_visible(ns, profile->ns, flags & FLAG_VIEW_SUBNS)) { @@ -1868,6 +1873,9 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str, if (*str == '&') str++; } + if (*str == '=') + base = &root_ns->unconfined->label; + error = vec_setup(profile, vec, len, gfp); if (error) return ERR_PTR(error); From 2410aa96d6b4930ed25fd02c3d173f14b962e0f4 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 18 Jul 2017 23:37:18 -0700 Subject: [PATCH 261/279] apparmor: make policy_unpack able to audit different info messages Switch unpack auditing to using the generic name field in the audit struct and make it so we can start adding new info messages about why an unpack failed. Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/include/audit.h | 4 +-- security/apparmor/policy_unpack.c | 52 ++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index c3fe1c5ef3bc..620e81169659 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -127,9 +127,9 @@ struct apparmor_audit_data { } fs; }; struct { - const char *name; - long pos; + struct aa_profile *profile; const char *ns; + long pos; } iface; int signal; struct { diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index bda0dce3b582..4ede87c30f8b 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -85,9 +85,9 @@ static void audit_cb(struct audit_buffer *ab, void *va) audit_log_format(ab, " ns="); audit_log_untrustedstring(ab, aad(sa)->iface.ns); } - if (aad(sa)->iface.name) { + if (aad(sa)->name) { audit_log_format(ab, " name="); - audit_log_untrustedstring(ab, aad(sa)->iface.name); + audit_log_untrustedstring(ab, aad(sa)->name); } if (aad(sa)->iface.pos) audit_log_format(ab, " offset=%ld", aad(sa)->iface.pos); @@ -114,9 +114,9 @@ static int audit_iface(struct aa_profile *new, const char *ns_name, aad(&sa)->iface.pos = e->pos - e->start; aad(&sa)->iface.ns = ns_name; if (new) - aad(&sa)->iface.name = new->base.hname; + aad(&sa)->name = new->base.hname; else - aad(&sa)->iface.name = name; + aad(&sa)->name = name; aad(&sa)->info = info; aad(&sa)->error = error; @@ -583,6 +583,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) { struct aa_profile *profile = NULL; const char *tmpname, *tmpns = NULL, *name = NULL; + const char *info = "failed to unpack profile"; size_t ns_len; struct rhashtable_params params = { 0 }; char *key = NULL; @@ -604,8 +605,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) tmpname = aa_splitn_fqname(name, strlen(name), &tmpns, &ns_len); if (tmpns) { *ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL); - if (!*ns_name) + if (!*ns_name) { + info = "out of memory"; goto fail; + } name = tmpname; } @@ -624,12 +627,15 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (IS_ERR(profile->xmatch)) { error = PTR_ERR(profile->xmatch); profile->xmatch = NULL; + info = "bad xmatch"; goto fail; } /* xmatch_len is not optional if xmatch is set */ if (profile->xmatch) { - if (!unpack_u32(e, &tmp, NULL)) + if (!unpack_u32(e, &tmp, NULL)) { + info = "missing xmatch len"; goto fail; + } profile->xmatch_len = tmp; } @@ -637,8 +643,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) (void) unpack_str(e, &profile->disconnected, "disconnected"); /* per profile debug flags (complain, audit) */ - if (!unpack_nameX(e, AA_STRUCT, "flags")) + if (!unpack_nameX(e, AA_STRUCT, "flags")) { + info = "profile missing flags"; goto fail; + } + info = "failed to unpack profile flags"; if (!unpack_u32(e, &tmp, NULL)) goto fail; if (tmp & PACKED_FLAG_HAT) @@ -667,6 +676,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) /* set a default value if path_flags field is not present */ profile->path_flags = PATH_MEDIATE_DELETED; + info = "failed to unpack profile capabilities"; if (!unpack_u32(e, &(profile->caps.allow.cap[0]), NULL)) goto fail; if (!unpack_u32(e, &(profile->caps.audit.cap[0]), NULL)) @@ -676,6 +686,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (!unpack_u32(e, &tmpcap.cap[0], NULL)) goto fail; + info = "failed to unpack upper profile capabilities"; if (unpack_nameX(e, AA_STRUCT, "caps64")) { /* optional upper half of 64 bit caps */ if (!unpack_u32(e, &(profile->caps.allow.cap[1]), NULL)) @@ -690,6 +701,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } + info = "failed to unpack extended profile capabilities"; if (unpack_nameX(e, AA_STRUCT, "capsx")) { /* optional extended caps mediation mask */ if (!unpack_u32(e, &(profile->caps.extended.cap[0]), NULL)) @@ -700,11 +712,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } - if (!unpack_rlimits(e, profile)) + if (!unpack_rlimits(e, profile)) { + info = "failed to unpack profile rlimits"; goto fail; + } if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ + info = "failed to unpack policydb"; profile->policy.dfa = unpack_dfa(e); if (IS_ERR(profile->policy.dfa)) { error = PTR_ERR(profile->policy.dfa); @@ -734,6 +749,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) if (IS_ERR(profile->file.dfa)) { error = PTR_ERR(profile->file.dfa); profile->file.dfa = NULL; + info = "failed to unpack profile file rules"; goto fail; } else if (profile->file.dfa) { if (!unpack_u32(e, &profile->file.start, "dfa_start")) @@ -746,10 +762,13 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) } else profile->file.dfa = aa_get_dfa(nulldfa); - if (!unpack_trans_table(e, profile)) + if (!unpack_trans_table(e, profile)) { + info = "failed to unpack profile transition table"; goto fail; + } if (unpack_nameX(e, AA_STRUCT, "data")) { + info = "out of memory"; profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL); if (!profile->data) goto fail; @@ -761,8 +780,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) params.hashfn = strhash; params.obj_cmpfn = datacmp; - if (rhashtable_init(profile->data, ¶ms)) + if (rhashtable_init(profile->data, ¶ms)) { + info = "failed to init key, value hash table"; goto fail; + } while (unpack_strdup(e, &key, NULL)) { data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -784,12 +805,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile->data->p); } - if (!unpack_nameX(e, AA_STRUCTEND, NULL)) + if (!unpack_nameX(e, AA_STRUCTEND, NULL)) { + info = "failed to unpack end of key, value data table"; goto fail; + } } - if (!unpack_nameX(e, AA_STRUCTEND, NULL)) + if (!unpack_nameX(e, AA_STRUCTEND, NULL)) { + info = "failed to unpack end of profile"; goto fail; + } return profile; @@ -798,8 +823,7 @@ fail: name = NULL; else if (!name) name = "unknown"; - audit_iface(profile, NULL, name, "failed to unpack profile", e, - error); + audit_iface(profile, NULL, name, info, e, error); aa_free_profile(profile); return ERR_PTR(error); From cbf2d0e1a9e4876046a628e0e036a7545a3a4c40 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 18 Jul 2017 23:41:13 -0700 Subject: [PATCH 262/279] apparmor: add more debug asserts to apparmorfs Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/apparmorfs.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 8fa6c898c44b..7acea14c850b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -1446,6 +1446,10 @@ void __aafs_profile_migrate_dents(struct aa_profile *old, { int i; + AA_BUG(!old); + AA_BUG(!new); + AA_BUG(!mutex_is_locked(&profiles_ns(old)->lock)); + for (i = 0; i < AAFS_PROF_SIZEOF; i++) { new->dents[i] = old->dents[i]; if (new->dents[i]) @@ -1509,6 +1513,9 @@ int __aafs_profile_mkdir(struct aa_profile *profile, struct dentry *parent) struct dentry *dent = NULL, *dir; int error; + AA_BUG(!profile); + AA_BUG(!mutex_is_locked(&profiles_ns(profile)->lock)); + if (!parent) { struct aa_profile *p; p = aa_deref_parent(profile); @@ -1734,6 +1741,7 @@ void __aafs_ns_rmdir(struct aa_ns *ns) if (!ns) return; + AA_BUG(!mutex_is_locked(&ns->lock)); list_for_each_entry(child, &ns->base.profiles, base.list) __aafs_profile_rmdir(child); @@ -1906,6 +1914,10 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns) { struct aa_ns *parent, *next; + AA_BUG(!root); + AA_BUG(!ns); + AA_BUG(ns != root && !mutex_is_locked(&ns->parent->lock)); + /* is next namespace a child */ if (!list_empty(&ns->sub_ns)) { next = list_first_entry(&ns->sub_ns, typeof(*ns), base.list); @@ -1940,6 +1952,9 @@ static struct aa_ns *__next_ns(struct aa_ns *root, struct aa_ns *ns) static struct aa_profile *__first_profile(struct aa_ns *root, struct aa_ns *ns) { + AA_BUG(!root); + AA_BUG(ns && !mutex_is_locked(&ns->lock)); + for (; ns; ns = __next_ns(root, ns)) { if (!list_empty(&ns->base.profiles)) return list_first_entry(&ns->base.profiles, @@ -1962,6 +1977,8 @@ static struct aa_profile *__next_profile(struct aa_profile *p) struct aa_profile *parent; struct aa_ns *ns = p->ns; + AA_BUG(!mutex_is_locked(&profiles_ns(p)->lock)); + /* is next profile a child */ if (!list_empty(&p->base.profiles)) return list_first_entry(&p->base.profiles, typeof(*p), From 651e28c5537abb39076d3949fb7618536f1d242e Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 18 Jul 2017 23:18:33 -0700 Subject: [PATCH 263/279] apparmor: add base infastructure for socket mediation Provide a basic mediation of sockets. This is not a full net mediation but just whether a spcific family of socket can be used by an application, along with setting up some basic infrastructure for network mediation to follow. the user space rule hav the basic form of NETWORK RULE = [ QUALIFIERS ] 'network' [ DOMAIN ] [ TYPE | PROTOCOL ] DOMAIN = ( 'inet' | 'ax25' | 'ipx' | 'appletalk' | 'netrom' | 'bridge' | 'atmpvc' | 'x25' | 'inet6' | 'rose' | 'netbeui' | 'security' | 'key' | 'packet' | 'ash' | 'econet' | 'atmsvc' | 'sna' | 'irda' | 'pppox' | 'wanpipe' | 'bluetooth' | 'netlink' | 'unix' | 'rds' | 'llc' | 'can' | 'tipc' | 'iucv' | 'rxrpc' | 'isdn' | 'phonet' | 'ieee802154' | 'caif' | 'alg' | 'nfc' | 'vsock' | 'mpls' | 'ib' | 'kcm' ) ',' TYPE = ( 'stream' | 'dgram' | 'seqpacket' | 'rdm' | 'raw' | 'packet' ) PROTOCOL = ( 'tcp' | 'udp' | 'icmp' ) eg. network, network inet, Signed-off-by: John Johansen Acked-by: Seth Arnold --- security/apparmor/.gitignore | 1 + security/apparmor/Makefile | 43 +++- security/apparmor/apparmorfs.c | 1 + security/apparmor/file.c | 30 +++ security/apparmor/include/audit.h | 26 +- security/apparmor/include/net.h | 114 +++++++++ security/apparmor/include/perms.h | 5 +- security/apparmor/include/policy.h | 13 + security/apparmor/lib.c | 5 +- security/apparmor/lsm.c | 387 +++++++++++++++++++++++++++++ security/apparmor/net.c | 184 ++++++++++++++ security/apparmor/policy_unpack.c | 47 +++- 12 files changed, 840 insertions(+), 16 deletions(-) create mode 100644 security/apparmor/include/net.h create mode 100644 security/apparmor/net.c diff --git a/security/apparmor/.gitignore b/security/apparmor/.gitignore index 9cdec70d72b8..d5b291e94264 100644 --- a/security/apparmor/.gitignore +++ b/security/apparmor/.gitignore @@ -1,5 +1,6 @@ # # Generated include files # +net_names.h capability_names.h rlim_names.h diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile index 81a34426d024..dafdd387d42b 100644 --- a/security/apparmor/Makefile +++ b/security/apparmor/Makefile @@ -4,11 +4,44 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o apparmor-y := apparmorfs.o audit.o capability.o context.o ipc.o lib.o match.o \ path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \ - resource.o secid.o file.o policy_ns.o label.o mount.o + resource.o secid.o file.o policy_ns.o label.o mount.o net.o apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o -clean-files := capability_names.h rlim_names.h +clean-files := capability_names.h rlim_names.h net_names.h +# Build a lower case string table of address family names +# Transform lines from +# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */ +# #define AF_INET 2 /* Internet IP Protocol */ +# to +# [1] = "local", +# [2] = "inet", +# +# and build the securityfs entries for the mapping. +# Transforms lines from +# #define AF_INET 2 /* Internet IP Protocol */ +# to +# #define AA_SFS_AF_MASK "local inet" +quiet_cmd_make-af = GEN $@ +cmd_make-af = echo "static const char *address_family_names[] = {" > $@ ;\ + sed $< >>$@ -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ + 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ + echo "};" >> $@ ;\ + printf '%s' '\#define AA_SFS_AF_MASK "' >> $@ ;\ + sed -r -n -e "/AF_MAX/d" -e "/AF_LOCAL/d" -e "/AF_ROUTE/d" -e \ + 's/^\#define[ \t]+AF_([A-Z0-9_]+)[ \t]+([0-9]+)(.*)/\L\1/p'\ + $< | tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@ + +# Build a lower case string table of sock type names +# Transform lines from +# SOCK_STREAM = 1, +# to +# [1] = "stream", +quiet_cmd_make-sock = GEN $@ +cmd_make-sock = echo "static const char *sock_type_names[] = {" >> $@ ;\ + sed $^ >>$@ -r -n \ + -e 's/^\tSOCK_([A-Z0-9_]+)[\t]+=[ \t]+([0-9]+)(.*)/[\2] = "\L\1",/p';\ + echo "};" >> $@ # Build a lower case string table of capability names # Transforms lines from @@ -61,6 +94,7 @@ cmd_make-rlim = echo "static const char *const rlim_names[RLIM_NLIMITS] = {" \ tr '\n' ' ' | sed -e 's/ $$/"\n/' >> $@ $(obj)/capability.o : $(obj)/capability_names.h +$(obj)/net.o : $(obj)/net_names.h $(obj)/resource.o : $(obj)/rlim_names.h $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \ $(src)/Makefile @@ -68,3 +102,8 @@ $(obj)/capability_names.h : $(srctree)/include/uapi/linux/capability.h \ $(obj)/rlim_names.h : $(srctree)/include/uapi/asm-generic/resource.h \ $(src)/Makefile $(call cmd,make-rlim) +$(obj)/net_names.h : $(srctree)/include/linux/socket.h \ + $(srctree)/include/linux/net.h \ + $(src)/Makefile + $(call cmd,make-af) + $(call cmd,make-sock) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 7acea14c850b..125dad5c3fde 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2202,6 +2202,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { AA_SFS_DIR("policy", aa_sfs_entry_policy), AA_SFS_DIR("domain", aa_sfs_entry_domain), AA_SFS_DIR("file", aa_sfs_entry_file), + AA_SFS_DIR("network", aa_sfs_entry_network), AA_SFS_DIR("mount", aa_sfs_entry_mount), AA_SFS_DIR("namespaces", aa_sfs_entry_ns), AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), diff --git a/security/apparmor/file.c b/security/apparmor/file.c index 3382518b87fa..db80221891c6 100644 --- a/security/apparmor/file.c +++ b/security/apparmor/file.c @@ -21,6 +21,7 @@ #include "include/context.h" #include "include/file.h" #include "include/match.h" +#include "include/net.h" #include "include/path.h" #include "include/policy.h" #include "include/label.h" @@ -566,6 +567,32 @@ static int __file_path_perm(const char *op, struct aa_label *label, return error; } +static int __file_sock_perm(const char *op, struct aa_label *label, + struct aa_label *flabel, struct file *file, + u32 request, u32 denied) +{ + struct socket *sock = (struct socket *) file->private_data; + int error; + + AA_BUG(!sock); + + /* revalidation due to label out of date. No revocation at this time */ + if (!denied && aa_label_is_subset(flabel, label)) + return 0; + + /* TODO: improve to skip profiles cached in flabel */ + error = aa_sock_file_perm(label, op, request, sock); + if (denied) { + /* TODO: improve to skip profiles checked above */ + /* check every profile in file label to is cached */ + last_error(error, aa_sock_file_perm(flabel, op, request, sock)); + } + if (!error) + update_file_ctx(file_ctx(file), label, request); + + return error; +} + /** * aa_file_perm - do permission revalidation check & audit for @file * @op: operation being checked @@ -610,6 +637,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file, error = __file_path_perm(op, label, flabel, file, request, denied); + else if (S_ISSOCK(file_inode(file)->i_mode)) + error = __file_sock_perm(op, label, flabel, file, request, + denied); done: rcu_read_unlock(); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index 620e81169659..ff4316e1068d 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -121,21 +121,29 @@ struct apparmor_audit_data { /* these entries require a custom callback fn */ struct { struct aa_label *peer; - struct { - const char *target; - kuid_t ouid; - } fs; + union { + struct { + kuid_t ouid; + const char *target; + } fs; + struct { + int type, protocol; + struct sock *peer_sk; + void *addr; + int addrlen; + } net; + int signal; + struct { + int rlim; + unsigned long max; + } rlim; + }; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; - int signal; - struct { - int rlim; - unsigned long max; - } rlim; struct { const char *src_name; const char *type; diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h new file mode 100644 index 000000000000..140c8efcf364 --- /dev/null +++ b/security/apparmor/include/net.h @@ -0,0 +1,114 @@ +/* + * AppArmor security module + * + * This file contains AppArmor network mediation definitions. + * + * Copyright (C) 1998-2008 Novell/SUSE + * Copyright 2009-2017 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#ifndef __AA_NET_H +#define __AA_NET_H + +#include +#include + +#include "apparmorfs.h" +#include "label.h" +#include "perms.h" +#include "policy.h" + +#define AA_MAY_SEND AA_MAY_WRITE +#define AA_MAY_RECEIVE AA_MAY_READ + +#define AA_MAY_SHUTDOWN AA_MAY_DELETE + +#define AA_MAY_CONNECT AA_MAY_OPEN +#define AA_MAY_ACCEPT 0x00100000 + +#define AA_MAY_BIND 0x00200000 +#define AA_MAY_LISTEN 0x00400000 + +#define AA_MAY_SETOPT 0x01000000 +#define AA_MAY_GETOPT 0x02000000 + +#define NET_PERMS_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE | \ + AA_MAY_SHUTDOWN | AA_MAY_BIND | AA_MAY_LISTEN | \ + AA_MAY_CONNECT | AA_MAY_ACCEPT | AA_MAY_SETATTR | \ + AA_MAY_GETATTR | AA_MAY_SETOPT | AA_MAY_GETOPT) + +#define NET_FS_PERMS (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CREATE | \ + AA_MAY_SHUTDOWN | AA_MAY_CONNECT | AA_MAY_RENAME |\ + AA_MAY_SETATTR | AA_MAY_GETATTR | AA_MAY_CHMOD | \ + AA_MAY_CHOWN | AA_MAY_CHGRP | AA_MAY_LOCK | \ + AA_MAY_MPROT) + +#define NET_PEER_MASK (AA_MAY_SEND | AA_MAY_RECEIVE | AA_MAY_CONNECT | \ + AA_MAY_ACCEPT) +struct aa_sk_ctx { + struct aa_label *label; + struct aa_label *peer; + struct path path; +}; + +#define SK_CTX(X) ((X)->sk_security) +#define SOCK_ctx(X) SOCK_INODE(X)->i_security +#define DEFINE_AUDIT_NET(NAME, OP, SK, F, T, P) \ + struct lsm_network_audit NAME ## _net = { .sk = (SK), \ + .family = (F)}; \ + DEFINE_AUDIT_DATA(NAME, \ + ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \ + LSM_AUDIT_DATA_NONE, \ + OP); \ + NAME.u.net = &(NAME ## _net); \ + aad(&NAME)->net.type = (T); \ + aad(&NAME)->net.protocol = (P) + +#define DEFINE_AUDIT_SK(NAME, OP, SK) \ + DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ + (SK)->sk_protocol) + +/* struct aa_net - network confinement data + * @allow: basic network families permissions + * @audit: which network permissions to force audit + * @quiet: which network permissions to quiet rejects + */ +struct aa_net { + u16 allow[AF_MAX]; + u16 audit[AF_MAX]; + u16 quiet[AF_MAX]; +}; + + +extern struct aa_sfs_entry aa_sfs_entry_network[]; + +void audit_net_cb(struct audit_buffer *ab, void *va); +int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, + u32 request, u16 family, int type); +int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, + int type, int protocol); +static inline int aa_profile_af_sk_perm(struct aa_profile *profile, + struct common_audit_data *sa, + u32 request, + struct sock *sk) +{ + return aa_profile_af_perm(profile, sa, request, sk->sk_family, + sk->sk_type); +} +int aa_sk_perm(const char *op, u32 request, struct sock *sk); + +int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, + struct socket *sock); + + +static inline void aa_free_net_rules(struct aa_net *new) +{ + /* NOP */ +} + +#endif /* __AA_NET_H */ diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h index 2b27bb79aec4..af04d5a7d73d 100644 --- a/security/apparmor/include/perms.h +++ b/security/apparmor/include/perms.h @@ -135,9 +135,10 @@ extern struct aa_perms allperms; void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask); -void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask); +void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, + u32 mask); void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, - u32 chrsmask, const char **names, u32 namesmask); + u32 chrsmask, const char * const *names, u32 namesmask); void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms); void aa_compute_perms(struct aa_dfa *dfa, unsigned int state, diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h index 17fe41a9cac3..4364088a0b9e 100644 --- a/security/apparmor/include/policy.h +++ b/security/apparmor/include/policy.h @@ -30,6 +30,7 @@ #include "file.h" #include "lib.h" #include "label.h" +#include "net.h" #include "perms.h" #include "resource.h" @@ -111,6 +112,7 @@ struct aa_data { * @policy: general match rules governing policy * @file: The set of rules governing basic file access and domain transitions * @caps: capabilities for the profile + * @net: network controls for the profile * @rlimits: rlimits for the profile * * @dents: dentries for the profiles file entries in apparmorfs @@ -148,6 +150,7 @@ struct aa_profile { struct aa_policydb policy; struct aa_file_rules file; struct aa_caps caps; + struct aa_net net; struct aa_rlimit rlimits; struct aa_loaddata *rawdata; @@ -220,6 +223,16 @@ static inline unsigned int PROFILE_MEDIATES_SAFE(struct aa_profile *profile, return 0; } +static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile, + u16 AF) { + unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET); + u16 be_af = cpu_to_be16(AF); + + if (!state) + return 0; + return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2); +} + /** * aa_get_profile - increment refcount on profile @p * @p: profile (MAYBE NULL) diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c index 08ca26bcca77..8818621b5d95 100644 --- a/security/apparmor/lib.c +++ b/security/apparmor/lib.c @@ -211,7 +211,8 @@ void aa_perm_mask_to_str(char *str, const char *chrs, u32 mask) *str = '\0'; } -void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask) +void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, + u32 mask) { const char *fmt = "%s"; unsigned int i, perm = 1; @@ -229,7 +230,7 @@ void aa_audit_perm_names(struct audit_buffer *ab, const char **names, u32 mask) } void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, - u32 chrsmask, const char **names, u32 namesmask) + u32 chrsmask, const char * const *names, u32 namesmask) { char str[33]; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 4ad0b3a45142..cc5ab23a2d84 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -33,6 +33,7 @@ #include "include/context.h" #include "include/file.h" #include "include/ipc.h" +#include "include/net.h" #include "include/path.h" #include "include/label.h" #include "include/policy.h" @@ -736,6 +737,368 @@ static int apparmor_task_kill(struct task_struct *target, struct siginfo *info, return error; } +/** + * apparmor_sk_alloc_security - allocate and attach the sk_security field + */ +static int apparmor_sk_alloc_security(struct sock *sk, int family, gfp_t flags) +{ + struct aa_sk_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), flags); + if (!ctx) + return -ENOMEM; + + SK_CTX(sk) = ctx; + + return 0; +} + +/** + * apparmor_sk_free_security - free the sk_security field + */ +static void apparmor_sk_free_security(struct sock *sk) +{ + struct aa_sk_ctx *ctx = SK_CTX(sk); + + SK_CTX(sk) = NULL; + aa_put_label(ctx->label); + aa_put_label(ctx->peer); + path_put(&ctx->path); + kfree(ctx); +} + +/** + * apparmor_clone_security - clone the sk_security field + */ +static void apparmor_sk_clone_security(const struct sock *sk, + struct sock *newsk) +{ + struct aa_sk_ctx *ctx = SK_CTX(sk); + struct aa_sk_ctx *new = SK_CTX(newsk); + + new->label = aa_get_label(ctx->label); + new->peer = aa_get_label(ctx->peer); + new->path = ctx->path; + path_get(&new->path); +} + +static int aa_sock_create_perm(struct aa_label *label, int family, int type, + int protocol) +{ + AA_BUG(!label); + AA_BUG(in_interrupt()); + + return aa_af_perm(label, OP_CREATE, AA_MAY_CREATE, family, type, + protocol); +} + + +/** + * apparmor_socket_create - check perms before creating a new socket + */ +static int apparmor_socket_create(int family, int type, int protocol, int kern) +{ + struct aa_label *label; + int error = 0; + + label = begin_current_label_crit_section(); + if (!(kern || unconfined(label))) + error = aa_sock_create_perm(label, family, type, protocol); + end_current_label_crit_section(label); + + return error; +} + +/** + * apparmor_socket_post_create - setup the per-socket security struct + * + * Note: + * - kernel sockets currently labeled unconfined but we may want to + * move to a special kernel label + * - socket may not have sk here if created with sock_create_lite or + * sock_alloc. These should be accept cases which will be handled in + * sock_graft. + */ +static int apparmor_socket_post_create(struct socket *sock, int family, + int type, int protocol, int kern) +{ + struct aa_label *label; + + if (kern) { + struct aa_ns *ns = aa_get_current_ns(); + + label = aa_get_label(ns_unconfined(ns)); + aa_put_ns(ns); + } else + label = aa_get_current_label(); + + if (sock->sk) { + struct aa_sk_ctx *ctx = SK_CTX(sock->sk); + + aa_put_label(ctx->label); + ctx->label = aa_get_label(label); + } + aa_put_label(label); + + return 0; +} + +/** + * apparmor_socket_bind - check perms before bind addr to socket + */ +static int apparmor_socket_bind(struct socket *sock, + struct sockaddr *address, int addrlen) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(!address); + AA_BUG(in_interrupt()); + + return aa_sk_perm(OP_BIND, AA_MAY_BIND, sock->sk); +} + +/** + * apparmor_socket_connect - check perms before connecting @sock to @address + */ +static int apparmor_socket_connect(struct socket *sock, + struct sockaddr *address, int addrlen) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(!address); + AA_BUG(in_interrupt()); + + return aa_sk_perm(OP_CONNECT, AA_MAY_CONNECT, sock->sk); +} + +/** + * apparmor_socket_list - check perms before allowing listen + */ +static int apparmor_socket_listen(struct socket *sock, int backlog) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(in_interrupt()); + + return aa_sk_perm(OP_LISTEN, AA_MAY_LISTEN, sock->sk); +} + +/** + * apparmor_socket_accept - check perms before accepting a new connection. + * + * Note: while @newsock is created and has some information, the accept + * has not been done. + */ +static int apparmor_socket_accept(struct socket *sock, struct socket *newsock) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(!newsock); + AA_BUG(in_interrupt()); + + return aa_sk_perm(OP_ACCEPT, AA_MAY_ACCEPT, sock->sk); +} + +static int aa_sock_msg_perm(const char *op, u32 request, struct socket *sock, + struct msghdr *msg, int size) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(!msg); + AA_BUG(in_interrupt()); + + return aa_sk_perm(op, request, sock->sk); +} + +/** + * apparmor_socket_sendmsg - check perms before sending msg to another socket + */ +static int apparmor_socket_sendmsg(struct socket *sock, + struct msghdr *msg, int size) +{ + return aa_sock_msg_perm(OP_SENDMSG, AA_MAY_SEND, sock, msg, size); +} + +/** + * apparmor_socket_recvmsg - check perms before receiving a message + */ +static int apparmor_socket_recvmsg(struct socket *sock, + struct msghdr *msg, int size, int flags) +{ + return aa_sock_msg_perm(OP_RECVMSG, AA_MAY_RECEIVE, sock, msg, size); +} + +/* revaliation, get/set attr, shutdown */ +static int aa_sock_perm(const char *op, u32 request, struct socket *sock) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(in_interrupt()); + + return aa_sk_perm(op, request, sock->sk); +} + +/** + * apparmor_socket_getsockname - check perms before getting the local address + */ +static int apparmor_socket_getsockname(struct socket *sock) +{ + return aa_sock_perm(OP_GETSOCKNAME, AA_MAY_GETATTR, sock); +} + +/** + * apparmor_socket_getpeername - check perms before getting remote address + */ +static int apparmor_socket_getpeername(struct socket *sock) +{ + return aa_sock_perm(OP_GETPEERNAME, AA_MAY_GETATTR, sock); +} + +/* revaliation, get/set attr, opt */ +static int aa_sock_opt_perm(const char *op, u32 request, struct socket *sock, + int level, int optname) +{ + AA_BUG(!sock); + AA_BUG(!sock->sk); + AA_BUG(in_interrupt()); + + return aa_sk_perm(op, request, sock->sk); +} + +/** + * apparmor_getsockopt - check perms before getting socket options + */ +static int apparmor_socket_getsockopt(struct socket *sock, int level, + int optname) +{ + return aa_sock_opt_perm(OP_GETSOCKOPT, AA_MAY_GETOPT, sock, + level, optname); +} + +/** + * apparmor_setsockopt - check perms before setting socket options + */ +static int apparmor_socket_setsockopt(struct socket *sock, int level, + int optname) +{ + return aa_sock_opt_perm(OP_SETSOCKOPT, AA_MAY_SETOPT, sock, + level, optname); +} + +/** + * apparmor_socket_shutdown - check perms before shutting down @sock conn + */ +static int apparmor_socket_shutdown(struct socket *sock, int how) +{ + return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock); +} + +/** + * apparmor_socket_sock_recv_skb - check perms before associating skb to sk + * + * Note: can not sleep may be called with locks held + * + * dont want protocol specific in __skb_recv_datagram() + * to deny an incoming connection socket_sock_rcv_skb() + */ +static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + + +static struct aa_label *sk_peer_label(struct sock *sk) +{ + struct aa_sk_ctx *ctx = SK_CTX(sk); + + if (ctx->peer) + return ctx->peer; + + return ERR_PTR(-ENOPROTOOPT); +} + +/** + * apparmor_socket_getpeersec_stream - get security context of peer + * + * Note: for tcp only valid if using ipsec or cipso on lan + */ +static int apparmor_socket_getpeersec_stream(struct socket *sock, + char __user *optval, + int __user *optlen, + unsigned int len) +{ + char *name; + int slen, error = 0; + struct aa_label *label; + struct aa_label *peer; + + label = begin_current_label_crit_section(); + peer = sk_peer_label(sock->sk); + if (IS_ERR(peer)) { + error = PTR_ERR(peer); + goto done; + } + slen = aa_label_asxprint(&name, labels_ns(label), peer, + FLAG_SHOW_MODE | FLAG_VIEW_SUBNS | + FLAG_HIDDEN_UNCONFINED, GFP_KERNEL); + /* don't include terminating \0 in slen, it breaks some apps */ + if (slen < 0) { + error = -ENOMEM; + } else { + if (slen > len) { + error = -ERANGE; + } else if (copy_to_user(optval, name, slen)) { + error = -EFAULT; + goto out; + } + if (put_user(slen, optlen)) + error = -EFAULT; +out: + kfree(name); + + } + +done: + end_current_label_crit_section(label); + + return error; +} + +/** + * apparmor_socket_getpeersec_dgram - get security label of packet + * @sock: the peer socket + * @skb: packet data + * @secid: pointer to where to put the secid of the packet + * + * Sets the netlabel socket state on sk from parent + */ +static int apparmor_socket_getpeersec_dgram(struct socket *sock, + struct sk_buff *skb, u32 *secid) + +{ + /* TODO: requires secid support */ + return -ENOPROTOOPT; +} + +/** + * apparmor_sock_graft - Initialize newly created socket + * @sk: child sock + * @parent: parent socket + * + * Note: could set off of SOCK_CTX(parent) but need to track inode and we can + * just set sk security information off of current creating process label + * Labeling of sk for accept case - probably should be sock based + * instead of task, because of the case where an implicitly labeled + * socket is shared by different tasks. + */ +static void apparmor_sock_graft(struct sock *sk, struct socket *parent) +{ + struct aa_sk_ctx *ctx = SK_CTX(sk); + + if (!ctx->label) + ctx->label = aa_get_current_label(); +} + static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check), LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme), @@ -770,6 +1133,30 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(getprocattr, apparmor_getprocattr), LSM_HOOK_INIT(setprocattr, apparmor_setprocattr), + LSM_HOOK_INIT(sk_alloc_security, apparmor_sk_alloc_security), + LSM_HOOK_INIT(sk_free_security, apparmor_sk_free_security), + LSM_HOOK_INIT(sk_clone_security, apparmor_sk_clone_security), + + LSM_HOOK_INIT(socket_create, apparmor_socket_create), + LSM_HOOK_INIT(socket_post_create, apparmor_socket_post_create), + LSM_HOOK_INIT(socket_bind, apparmor_socket_bind), + LSM_HOOK_INIT(socket_connect, apparmor_socket_connect), + LSM_HOOK_INIT(socket_listen, apparmor_socket_listen), + LSM_HOOK_INIT(socket_accept, apparmor_socket_accept), + LSM_HOOK_INIT(socket_sendmsg, apparmor_socket_sendmsg), + LSM_HOOK_INIT(socket_recvmsg, apparmor_socket_recvmsg), + LSM_HOOK_INIT(socket_getsockname, apparmor_socket_getsockname), + LSM_HOOK_INIT(socket_getpeername, apparmor_socket_getpeername), + LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt), + LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt), + LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown), + LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb), + LSM_HOOK_INIT(socket_getpeersec_stream, + apparmor_socket_getpeersec_stream), + LSM_HOOK_INIT(socket_getpeersec_dgram, + apparmor_socket_getpeersec_dgram), + LSM_HOOK_INIT(sock_graft, apparmor_sock_graft), + LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank), LSM_HOOK_INIT(cred_free, apparmor_cred_free), LSM_HOOK_INIT(cred_prepare, apparmor_cred_prepare), diff --git a/security/apparmor/net.c b/security/apparmor/net.c new file mode 100644 index 000000000000..33d54435f8d6 --- /dev/null +++ b/security/apparmor/net.c @@ -0,0 +1,184 @@ +/* + * AppArmor security module + * + * This file contains AppArmor network mediation + * + * Copyright (C) 1998-2008 Novell/SUSE + * Copyright 2009-2017 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include "include/apparmor.h" +#include "include/audit.h" +#include "include/context.h" +#include "include/label.h" +#include "include/net.h" +#include "include/policy.h" + +#include "net_names.h" + + +struct aa_sfs_entry aa_sfs_entry_network[] = { + AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), + { } +}; + +static const char * const net_mask_names[] = { + "unknown", + "send", + "receive", + "unknown", + + "create", + "shutdown", + "connect", + "unknown", + + "setattr", + "getattr", + "setcred", + "getcred", + + "chmod", + "chown", + "chgrp", + "lock", + + "mmap", + "mprot", + "unknown", + "unknown", + + "accept", + "bind", + "listen", + "unknown", + + "setopt", + "getopt", + "unknown", + "unknown", + + "unknown", + "unknown", + "unknown", + "unknown", +}; + + +/* audit callback for net specific fields */ +void audit_net_cb(struct audit_buffer *ab, void *va) +{ + struct common_audit_data *sa = va; + + audit_log_format(ab, " family="); + if (address_family_names[sa->u.net->family]) + audit_log_string(ab, address_family_names[sa->u.net->family]); + else + audit_log_format(ab, "\"unknown(%d)\"", sa->u.net->family); + audit_log_format(ab, " sock_type="); + if (sock_type_names[aad(sa)->net.type]) + audit_log_string(ab, sock_type_names[aad(sa)->net.type]); + else + audit_log_format(ab, "\"unknown(%d)\"", aad(sa)->net.type); + audit_log_format(ab, " protocol=%d", aad(sa)->net.protocol); + + if (aad(sa)->request & NET_PERMS_MASK) { + audit_log_format(ab, " requested_mask="); + aa_audit_perm_mask(ab, aad(sa)->request, NULL, 0, + net_mask_names, NET_PERMS_MASK); + + if (aad(sa)->denied & NET_PERMS_MASK) { + audit_log_format(ab, " denied_mask="); + aa_audit_perm_mask(ab, aad(sa)->denied, NULL, 0, + net_mask_names, NET_PERMS_MASK); + } + } + if (aad(sa)->peer) { + audit_log_format(ab, " peer="); + aa_label_xaudit(ab, labels_ns(aad(sa)->label), aad(sa)->peer, + FLAGS_NONE, GFP_ATOMIC); + } +} + + +/* Generic af perm */ +int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, + u32 request, u16 family, int type) +{ + struct aa_perms perms = { }; + + AA_BUG(family >= AF_MAX); + AA_BUG(type < 0 || type >= SOCK_MAX); + + if (profile_unconfined(profile)) + return 0; + + perms.allow = (profile->net.allow[family] & (1 << type)) ? + ALL_PERMS_MASK : 0; + perms.audit = (profile->net.audit[family] & (1 << type)) ? + ALL_PERMS_MASK : 0; + perms.quiet = (profile->net.quiet[family] & (1 << type)) ? + ALL_PERMS_MASK : 0; + aa_apply_modes_to_perms(profile, &perms); + + return aa_check_perms(profile, &perms, request, sa, audit_net_cb); +} + +int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family, + int type, int protocol) +{ + struct aa_profile *profile; + DEFINE_AUDIT_NET(sa, op, NULL, family, type, protocol); + + return fn_for_each_confined(label, profile, + aa_profile_af_perm(profile, &sa, request, family, + type)); +} + +static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request, + struct sock *sk) +{ + struct aa_profile *profile; + DEFINE_AUDIT_SK(sa, op, sk); + + AA_BUG(!label); + AA_BUG(!sk); + + if (unconfined(label)) + return 0; + + return fn_for_each_confined(label, profile, + aa_profile_af_sk_perm(profile, &sa, request, sk)); +} + +int aa_sk_perm(const char *op, u32 request, struct sock *sk) +{ + struct aa_label *label; + int error; + + AA_BUG(!sk); + AA_BUG(in_interrupt()); + + /* TODO: switch to begin_current_label ???? */ + label = begin_current_label_crit_section(); + error = aa_label_sk_perm(label, op, request, sk); + end_current_label_crit_section(label); + + return error; +} + + +int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request, + struct socket *sock) +{ + AA_BUG(!label); + AA_BUG(!sock); + AA_BUG(!sock->sk); + + return aa_label_sk_perm(label, op, request, sock->sk); +} diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 4ede87c30f8b..5a2aec358322 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -275,6 +275,19 @@ fail: return 0; } +static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name) +{ + if (unpack_nameX(e, AA_U16, name)) { + if (!inbounds(e, sizeof(u16))) + return 0; + if (data) + *data = le16_to_cpu(get_unaligned((__le16 *) e->pos)); + e->pos += sizeof(u16); + return 1; + } + return 0; +} + static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name) { if (unpack_nameX(e, AA_U32, name)) { @@ -584,7 +597,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) struct aa_profile *profile = NULL; const char *tmpname, *tmpns = NULL, *name = NULL; const char *info = "failed to unpack profile"; - size_t ns_len; + size_t size = 0, ns_len; struct rhashtable_params params = { 0 }; char *key = NULL; struct aa_data *data; @@ -717,6 +730,38 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) goto fail; } + size = unpack_array(e, "net_allowed_af"); + if (size) { + + for (i = 0; i < size; i++) { + /* discard extraneous rules that this kernel will + * never request + */ + if (i >= AF_MAX) { + u16 tmp; + + if (!unpack_u16(e, &tmp, NULL) || + !unpack_u16(e, &tmp, NULL) || + !unpack_u16(e, &tmp, NULL)) + goto fail; + continue; + } + if (!unpack_u16(e, &profile->net.allow[i], NULL)) + goto fail; + if (!unpack_u16(e, &profile->net.audit[i], NULL)) + goto fail; + if (!unpack_u16(e, &profile->net.quiet[i], NULL)) + goto fail; + } + if (!unpack_nameX(e, AA_ARRAYEND, NULL)) + goto fail; + } + if (VERSION_LT(e->version, v7)) { + /* pre v7 policy always allowed these */ + profile->net.allow[AF_UNIX] = 0xffff; + profile->net.allow[AF_NETLINK] = 0xffff; + } + if (unpack_nameX(e, AA_STRUCT, "policydb")) { /* generic policy dfa - optional and may be NULL */ info = "failed to unpack policydb"; From d07881d2edb0ab783846730629ac2faeaafdf4f1 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 16 Aug 2017 08:59:57 -0700 Subject: [PATCH 264/279] apparmor: move new_null_profile to after profile lookup fns() new_null_profile will need to use some of the profile lookup fns() so move instead of doing forward fn declarations. Signed-off-by: John Johansen --- security/apparmor/policy.c | 158 ++++++++++++++++++------------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 244ea4a4a8f0..a81a384a63b1 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -289,85 +289,6 @@ fail: return NULL; } -/** - * aa_new_null_profile - create or find a null-X learning profile - * @parent: profile that caused this profile to be created (NOT NULL) - * @hat: true if the null- learning profile is a hat - * @base: name to base the null profile off of - * @gfp: type of allocation - * - * Find/Create a null- complain mode profile used in learning mode. The - * name of the profile is unique and follows the format of parent//null-XXX. - * where XXX is based on the @name or if that fails or is not supplied - * a unique number - * - * null profiles are added to the profile list but the list does not - * hold a count on them so that they are automatically released when - * not in use. - * - * Returns: new refcounted profile else NULL on failure - */ -struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, - const char *base, gfp_t gfp) -{ - struct aa_profile *profile; - char *name; - - AA_BUG(!parent); - - if (base) { - name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base), - gfp); - if (name) { - sprintf(name, "%s//null-%s", parent->base.hname, base); - goto name; - } - /* fall through to try shorter uniq */ - } - - name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp); - if (!name) - return NULL; - sprintf(name, "%s//null-%x", parent->base.hname, - atomic_inc_return(&parent->ns->uniq_null)); - -name: - /* lookup to see if this is a dup creation */ - profile = aa_find_child(parent, basename(name)); - if (profile) - goto out; - - profile = aa_alloc_profile(name, NULL, gfp); - if (!profile) - goto fail; - - profile->mode = APPARMOR_COMPLAIN; - profile->label.flags |= FLAG_NULL; - if (hat) - profile->label.flags |= FLAG_HAT; - profile->path_flags = parent->path_flags; - - /* released on free_profile */ - rcu_assign_pointer(profile->parent, aa_get_profile(parent)); - profile->ns = aa_get_ns(parent->ns); - profile->file.dfa = aa_get_dfa(nulldfa); - profile->policy.dfa = aa_get_dfa(nulldfa); - - mutex_lock(&profile->ns->lock); - __add_profile(&parent->base.profiles, profile); - mutex_unlock(&profile->ns->lock); - - /* refcount released by caller */ -out: - kfree(name); - - return profile; - -fail: - aa_free_profile(profile); - return NULL; -} - /* TODO: profile accounting - setup in remove */ /** @@ -558,6 +479,85 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base, return profile; } +/** + * aa_new_null_profile - create or find a null-X learning profile + * @parent: profile that caused this profile to be created (NOT NULL) + * @hat: true if the null- learning profile is a hat + * @base: name to base the null profile off of + * @gfp: type of allocation + * + * Find/Create a null- complain mode profile used in learning mode. The + * name of the profile is unique and follows the format of parent//null-XXX. + * where XXX is based on the @name or if that fails or is not supplied + * a unique number + * + * null profiles are added to the profile list but the list does not + * hold a count on them so that they are automatically released when + * not in use. + * + * Returns: new refcounted profile else NULL on failure + */ +struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, + const char *base, gfp_t gfp) +{ + struct aa_profile *profile; + char *name; + + AA_BUG(!parent); + + if (base) { + name = kmalloc(strlen(parent->base.hname) + 8 + strlen(base), + gfp); + if (name) { + sprintf(name, "%s//null-%s", parent->base.hname, base); + goto name; + } + /* fall through to try shorter uniq */ + } + + name = kmalloc(strlen(parent->base.hname) + 2 + 7 + 8, gfp); + if (!name) + return NULL; + sprintf(name, "%s//null-%x", parent->base.hname, + atomic_inc_return(&parent->ns->uniq_null)); + +name: + /* lookup to see if this is a dup creation */ + profile = aa_find_child(parent, basename(name)); + if (profile) + goto out; + + profile = aa_alloc_profile(name, NULL, gfp); + if (!profile) + goto fail; + + profile->mode = APPARMOR_COMPLAIN; + profile->label.flags |= FLAG_NULL; + if (hat) + profile->label.flags |= FLAG_HAT; + profile->path_flags = parent->path_flags; + + /* released on free_profile */ + rcu_assign_pointer(profile->parent, aa_get_profile(parent)); + profile->ns = aa_get_ns(parent->ns); + profile->file.dfa = aa_get_dfa(nulldfa); + profile->policy.dfa = aa_get_dfa(nulldfa); + + mutex_lock(&profile->ns->lock); + __add_profile(&parent->base.profiles, profile); + mutex_unlock(&profile->ns->lock); + + /* refcount released by caller */ +out: + kfree(name); + + return profile; + +fail: + aa_free_profile(profile); + return NULL; +} + /** * replacement_allowed - test to see if replacement is allowed * @profile: profile to test if it can be replaced (MAYBE NULL) From 290638a52a808d658bd04b746b3ca46886c157e0 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 16 Aug 2017 05:40:49 -0700 Subject: [PATCH 265/279] apparmor: fix race condition in null profile creation There is a race when null- profile is being created between the initial lookup/creation of the profile and lock/addition of the profile. This could result in multiple version of a profile being added to the list which need to be removed/replaced. Since these are learning profile their is no affect on mediation. Signed-off-by: John Johansen --- security/apparmor/policy.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index a81a384a63b1..4243b0c3f0e4 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -500,7 +500,8 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base, struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, const char *base, gfp_t gfp) { - struct aa_profile *profile; + struct aa_profile *p, *profile; + const char *bname; char *name; AA_BUG(!parent); @@ -523,7 +524,8 @@ struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat, name: /* lookup to see if this is a dup creation */ - profile = aa_find_child(parent, basename(name)); + bname = basename(name); + profile = aa_find_child(parent, bname); if (profile) goto out; @@ -544,7 +546,13 @@ name: profile->policy.dfa = aa_get_dfa(nulldfa); mutex_lock(&profile->ns->lock); - __add_profile(&parent->base.profiles, profile); + p = __find_child(&parent->base.profiles, bname); + if (p) { + aa_free_profile(profile); + profile = aa_get_profile(p); + } else { + __add_profile(&parent->base.profiles, profile); + } mutex_unlock(&profile->ns->lock); /* refcount released by caller */ From 15372b97aa7593c6f5bc1afe69f42fd403c40685 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 16 Aug 2017 05:48:06 -0700 Subject: [PATCH 266/279] apparmor: ensure unconfined profiles have dfas initialized Generally unconfined has early bailout tests and does not need the dfas initialized, however if an early bailout test is ever missed it will result in an oops. Be defensive and initialize the unconfined profile to have null dfas (no permission) so if an early bailout test is missed we fail closed (no perms granted) instead of oopsing. Signed-off-by: John Johansen --- security/apparmor/policy_ns.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c index 351d3bab3a3d..62a3589c62ab 100644 --- a/security/apparmor/policy_ns.c +++ b/security/apparmor/policy_ns.c @@ -112,6 +112,8 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name) ns->unconfined->label.flags |= FLAG_IX_ON_NAME_ERROR | FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED; ns->unconfined->mode = APPARMOR_UNCONFINED; + ns->unconfined->file.dfa = aa_get_dfa(nulldfa); + ns->unconfined->policy.dfa = aa_get_dfa(nulldfa); /* ns and ns->unconfined share ns->unconfined refcount */ ns->unconfined->ns = ns; From bc4d82fb946e7b471eab2a80e384227c4eb15652 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 16 Aug 2017 09:33:48 -0700 Subject: [PATCH 267/279] apparmor: fix incorrect type assignment when freeing proxies sparse reports poisoning the proxy->label before freeing the struct is resulting in a sparse build warning. ../security/apparmor/label.c:52:30: warning: incorrect type in assignment (different address spaces) ../security/apparmor/label.c:52:30: expected struct aa_label [noderef] *label ../security/apparmor/label.c:52:30: got struct aa_label * fix with RCU_INIT_POINTER as this is one of those cases where rcu_assign_pointer() is not needed. Signed-off-by: John Johansen --- security/apparmor/label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index 52b4ef14840d..c5b99b954580 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -49,7 +49,7 @@ static void free_proxy(struct aa_proxy *proxy) /* p->label will not updated any more as p is dead */ aa_put_label(rcu_dereference_protected(proxy->label, true)); memset(proxy, 0, sizeof(*proxy)); - proxy->label = (struct aa_label *) PROXY_POISON; + RCU_INIT_POINTER(proxy->label, (struct aa_label *)PROXY_POISON); kfree(proxy); } } From b1545dba092ba543eab1f7b5ed757a4988e267c8 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 23 Aug 2017 12:10:39 -0700 Subject: [PATCH 268/279] apparmor: fix build failure on sparc caused by undeclared signals In file included from security/apparmor/ipc.c:23:0: security/apparmor/include/sig_names.h:26:3: error: 'SIGSTKFLT' undeclared here (not in a function) [SIGSTKFLT] = 16, /* -, 16, - */ ^ security/apparmor/include/sig_names.h:26:3: error: array index in initializer not of integer type security/apparmor/include/sig_names.h:26:3: note: (near initialization for 'sig_map') security/apparmor/include/sig_names.h:51:3: error: 'SIGUNUSED' undeclared here (not in a function) [SIGUNUSED] = 34, /* -, 31, - */ ^ security/apparmor/include/sig_names.h:51:3: error: array index in initializer not of integer type security/apparmor/include/sig_names.h:51:3: note: (near initialization for 'sig_map') Reported-by: Stephen Rothwell Fixes: c6bf1adaecaa ("apparmor: add the ability to mediate signals") Signed-off-by: John Johansen --- security/apparmor/include/sig_names.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/security/apparmor/include/sig_names.h b/security/apparmor/include/sig_names.h index 0d4395f231ca..92e62fe95292 100644 --- a/security/apparmor/include/sig_names.h +++ b/security/apparmor/include/sig_names.h @@ -23,7 +23,9 @@ static const int sig_map[MAXMAPPED_SIG] = { [SIGPIPE] = 13, [SIGALRM] = 14, [SIGTERM] = 15, +#ifdef SIGSTKFLT [SIGSTKFLT] = 16, /* -, 16, - */ +#endif [SIGCHLD] = 17, /* 20, 17, 18. SIGCHLD -, -, 18 */ [SIGCONT] = 18, /* 19, 18, 25 */ [SIGSTOP] = 19, /* 17, 19, 23 */ @@ -47,7 +49,8 @@ static const int sig_map[MAXMAPPED_SIG] = { #if defined(SIGLOST) && SIGPWR != SIGLOST /* sparc */ [SIGLOST] = 33, /* unused on Linux */ #endif -#if defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS +#if defined(SIGUNUSED) && \ + defined(SIGLOST) && defined(SIGSYS) && SIGLOST != SIGSYS [SIGUNUSED] = 34, /* -, 31, - */ #endif }; From bf81100f63db7ea243d17b9d5008ba3af2fdf6b2 Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 31 Aug 2017 09:54:43 -0700 Subject: [PATCH 269/279] apparmor: fix apparmorfs DAC access permissions The DAC access permissions for several apparmorfs files are wrong. .access - needs to be writable by all tasks to perform queries the others in the set only provide a read fn so should be read only. With policy namespace virtualization all apparmor needs to control the permission and visibility checks directly which means DAC access has to be allowed for all user, group, and other. BugLink: http://bugs.launchpad.net/bugs/1713103 Fixes: c97204baf840b ("apparmor: rename apparmor file fns and data to indicate use") Signed-off-by: John Johansen --- security/apparmor/apparmorfs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 125dad5c3fde..518d5928661b 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -2215,12 +2215,12 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { }; static struct aa_sfs_entry aa_sfs_entry_apparmor[] = { - AA_SFS_FILE_FOPS(".access", 0640, &aa_sfs_access), + AA_SFS_FILE_FOPS(".access", 0666, &aa_sfs_access), AA_SFS_FILE_FOPS(".stacked", 0444, &seq_ns_stacked_fops), AA_SFS_FILE_FOPS(".ns_stacked", 0444, &seq_ns_nsstacked_fops), - AA_SFS_FILE_FOPS(".ns_level", 0666, &seq_ns_level_fops), - AA_SFS_FILE_FOPS(".ns_name", 0640, &seq_ns_name_fops), - AA_SFS_FILE_FOPS("profiles", 0440, &aa_sfs_profiles_fops), + AA_SFS_FILE_FOPS(".ns_level", 0444, &seq_ns_level_fops), + AA_SFS_FILE_FOPS(".ns_name", 0444, &seq_ns_name_fops), + AA_SFS_FILE_FOPS("profiles", 0444, &aa_sfs_profiles_fops), AA_SFS_DIR("features", aa_sfs_entry_features), { } }; From af21b01d1166248f282fc02d0f459c94de06615e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 22 Sep 2017 22:24:02 +0200 Subject: [PATCH 270/279] parisc: Reintroduce option to gzip-compress the kernel By adding the feature to build the kernel as self-extracting executeable, the possibility to simply compress the kernel with gzip was lost. This patch now reintroduces this possibilty again and leaves it up to the user to decide how the kernel should be built. The palo bootloader is able to natively load both formats. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 12 ++++++++++++ arch/parisc/Makefile | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index ba7b7ddc3844..a57dedbfc7b7 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -257,6 +257,18 @@ config PARISC_PAGE_SIZE_64KB endchoice +config PARISC_SELF_EXTRACT + bool "Build kernel as self-extracting executable" + default y + help + Say Y if you want to build the parisc kernel as a kind of + self-extracting executable. + + If you say N here, the kernel will be compressed with gzip + which can be loaded by the palo bootloader directly too. + + If you don't know what to do here, say Y. + config SMP bool "Symmetric multi-processing support" ---help--- diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 58fae5d2449d..01946ebaff72 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -129,8 +129,13 @@ Image: vmlinux bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ +ifdef CONFIG_PARISC_SELF_EXTRACT vmlinuz: bzImage $(OBJCOPY) $(boot)/bzImage $@ +else +vmlinuz: vmlinux + @gzip -cf -9 $< > $@ +endif install: $(CONFIG_SHELL) $(src)/arch/parisc/install.sh \ From 8c031ba63f8f2a9efc471cb45b2ff18271556544 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 22 Sep 2017 21:57:11 +0200 Subject: [PATCH 271/279] parisc: Unbreak bootloader due to gcc-7 optimizations gcc-7 optimizes the byte-wise accesses of get_unaligned_le32() into word-wise accesses if the 32-bit integer output_len is declared as external. This panics then the bootloader since we don't have the unaligned access fault trap handler installed during boot time. Avoid this optimization by declaring output_len as byte-aligned and thus unbreak the bootloader code. Additionally, compile the boot code optimized for size. Signed-off-by: Helge Deller --- arch/parisc/boot/compressed/Makefile | 2 +- arch/parisc/boot/compressed/misc.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 5450a11c9d10..7d7e594bda36 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -15,7 +15,7 @@ targets += misc.o piggy.o sizes.h head.o real2.o firmware.o KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs +KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os ifndef CONFIG_64BIT KBUILD_CFLAGS += -mfast-indirect-calls endif diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 13a4bf9ac4da..9345b44b86f0 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -24,7 +24,8 @@ /* Symbols defined by linker scripts */ extern char input_data[]; extern int input_len; -extern __le32 output_len; /* at unaligned address, little-endian */ +/* output_len is inserted by the linker possibly at an unaligned address */ +extern __le32 output_len __aligned(1); extern char _text, _end; extern char _bss, _ebss; extern char _startcode_end; From c17c02040bf0d186cebd3e66ff349f955575bf38 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 22 Sep 2017 09:42:42 +0200 Subject: [PATCH 272/279] arch: remove unused *_segments() macros/functions Some architectures define the no-op macros/functions copy_segments, release_segments and forget_segments. These are used nowhere in the tree, so removed them. Signed-off-by: Tobias Klauser Acked-by: Vineet Gupta [for arch/arc] Signed-off-by: Linus Torvalds --- arch/arc/include/asm/processor.h | 3 --- arch/c6x/include/asm/processor.h | 3 --- arch/frv/include/asm/processor.h | 4 ---- arch/m32r/include/asm/processor.h | 8 -------- arch/metag/include/asm/processor.h | 3 --- arch/mn10300/kernel/process.c | 12 ------------ arch/sh/include/asm/processor_32.h | 4 ---- arch/sh/include/asm/processor_64.h | 4 ---- arch/um/include/asm/processor-generic.h | 5 ----- arch/xtensa/include/asm/processor.h | 5 ----- 10 files changed, 51 deletions(-) diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h index d400a2161935..8ee41e988169 100644 --- a/arch/arc/include/asm/processor.h +++ b/arch/arc/include/asm/processor.h @@ -78,9 +78,6 @@ struct task_struct; #endif -#define copy_segments(tsk, mm) do { } while (0) -#define release_segments(mm) do { } while (0) - #define KSTK_EIP(tsk) (task_pt_regs(tsk)->ret) #define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp) diff --git a/arch/c6x/include/asm/processor.h b/arch/c6x/include/asm/processor.h index 7c87b5be53b5..8f7cce829f8e 100644 --- a/arch/c6x/include/asm/processor.h +++ b/arch/c6x/include/asm/processor.h @@ -92,9 +92,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -#define copy_segments(tsk, mm) do { } while (0) -#define release_segments(mm) do { } while (0) - /* * saved kernel SP and DP of a blocked thread. */ diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h index e4d08d74ed9f..021cce78b401 100644 --- a/arch/frv/include/asm/processor.h +++ b/arch/frv/include/asm/processor.h @@ -92,10 +92,6 @@ static inline void release_thread(struct task_struct *dead_task) extern asmlinkage void save_user_regs(struct user_context *target); extern asmlinkage void *restore_user_regs(const struct user_context *target, ...); -#define copy_segments(tsk, mm) do { } while (0) -#define release_segments(mm) do { } while (0) -#define forget_segments() do { } while (0) - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc) diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h index 657874eeeccc..c70fa9ac7169 100644 --- a/arch/m32r/include/asm/processor.h +++ b/arch/m32r/include/asm/processor.h @@ -118,14 +118,6 @@ struct mm_struct; /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* Copy and release all segment info associated with a VM */ -extern void copy_segments(struct task_struct *p, struct mm_struct * mm); -extern void release_segments(struct mm_struct * mm); - -/* Copy and release all segment info associated with a VM */ -#define copy_segments(p, mm) do { } while (0) -#define release_segments(mm) do { } while (0) - unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) ((tsk)->thread.lr) #define KSTK_ESP(tsk) ((tsk)->thread.sp) diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h index ec6a49076980..8ae92d6abfd2 100644 --- a/arch/metag/include/asm/processor.h +++ b/arch/metag/include/asm/processor.h @@ -131,9 +131,6 @@ static inline void release_thread(struct task_struct *dead_task) { } -#define copy_segments(tsk, mm) do { } while (0) -#define release_segments(mm) do { } while (0) - /* * Return saved PC of a blocked thread. */ diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 89e8027e07fb..7c475fd99c46 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -59,10 +59,6 @@ void arch_cpu_idle(void) } #endif -void release_segments(struct mm_struct *mm) -{ -} - void machine_restart(char *cmd) { #ifdef CONFIG_KERNEL_DEBUGGER @@ -112,14 +108,6 @@ void release_thread(struct task_struct *dead_task) { } -/* - * we do not have to muck with descriptors here, that is - * done in switch_mm() as needed. - */ -void copy_segments(struct task_struct *p, struct mm_struct *new_mm) -{ -} - /* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h index 18e0377f72bb..88ce1e22237b 100644 --- a/arch/sh/include/asm/processor_32.h +++ b/arch/sh/include/asm/processor_32.h @@ -136,10 +136,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_pc, unsigned lo /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* Copy and release all segment info associated with a VM */ -#define copy_segments(p, mm) do { } while(0) -#define release_segments(mm) do { } while(0) - /* * FPU lazy state save handling. */ diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h index eedd4f625d07..777a16318aff 100644 --- a/arch/sh/include/asm/processor_64.h +++ b/arch/sh/include/asm/processor_64.h @@ -170,10 +170,6 @@ struct mm_struct; /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); -/* Copy and release all segment info associated with a VM */ -#define copy_segments(p, mm) do { } while (0) -#define release_segments(mm) do { } while (0) -#define forget_segments() do { } while (0) /* * FPU lazy state save handling. */ diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h index f6d1a3f747a9..86942a492454 100644 --- a/arch/um/include/asm/processor-generic.h +++ b/arch/um/include/asm/processor-generic.h @@ -58,11 +58,6 @@ static inline void release_thread(struct task_struct *task) { } -static inline void mm_copy_segments(struct mm_struct *from_mm, - struct mm_struct *new_mm) -{ -} - #define init_stack (init_thread_union.stack) /* diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 30ee8c608853..5b0027d4ecc0 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -208,11 +208,6 @@ struct mm_struct; /* Free all resources held by a thread. */ #define release_thread(thread) do { } while(0) -/* Copy and release all segment info associated with a VM */ -#define copy_segments(p, mm) do { } while(0) -#define release_segments(mm) do { } while(0) -#define forget_segments() do { } while (0) - extern unsigned long get_wchan(struct task_struct *p); #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc) From 6e70e26dc52be62c1f39f81b5f71fa5e643677aa Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 21 Sep 2017 21:32:29 -0500 Subject: [PATCH 273/279] SMB3: handle new statx fields We weren't returning the creation time or the two easily supported attributes (ENCRYPTED or COMPRESSED) for the getattr call to allow statx to return these fields. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg \ Acked-by: Jeff Layton CC: Stable Reviewed-by: Pavel Shilovsky --- fs/cifs/inode.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8693632235f..7c732cb44164 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -234,6 +234,8 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info, fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime); fattr->cf_mtime = cifs_NTtimeToUnix(info->LastModificationTime); fattr->cf_ctime = cifs_NTtimeToUnix(info->LastStatusChange); + /* old POSIX extensions don't get create time */ + fattr->cf_mode = le64_to_cpu(info->Permissions); /* @@ -2024,6 +2026,19 @@ int cifs_getattr(const struct path *path, struct kstat *stat, stat->blksize = CIFS_MAX_MSGSIZE; stat->ino = CIFS_I(inode)->uniqueid; + /* old CIFS Unix Extensions doesn't return create time */ + if (CIFS_I(inode)->createtime) { + stat->result_mask |= STATX_BTIME; + stat->btime = + cifs_NTtimeToUnix(cpu_to_le64(CIFS_I(inode)->createtime)); + } + + stat->attributes_mask |= (STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED); + if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_COMPRESSED) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (CIFS_I(inode)->cifsAttrs & FILE_ATTRIBUTE_ENCRYPTED) + stat->attributes |= STATX_ATTR_ENCRYPTED; + /* * If on a multiuser mount without unix extensions or cifsacl being * enabled, and the admin hasn't overridden them, set the ownership From 1013e760d10e614dc10b5624ce9fc41563ba2e65 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 22 Sep 2017 01:40:27 -0500 Subject: [PATCH 274/279] SMB3: Don't ignore O_SYNC/O_DSYNC and O_DIRECT flags Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky --- fs/cifs/file.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 822311919163..92fdf9c35de2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -224,6 +224,13 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = desired_access; From b9b95da92de9d498ece7e6a82e2d6dcfc76fd9d8 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Fri, 22 Sep 2017 14:28:46 +0200 Subject: [PATCH 275/279] MAINTAINERS: update git tree locations for ieee802154 subsystem Patches for ieee802154 will go through my new trees towards netdev from now on. The 6LoWPAN subsystem will stay as is (shared between ieee802154 and bluetooth) and go through the bluetooth tree as usual. Signed-off-by: Stefan Schmidt Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 955f034fd523..61ee134cf6a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6642,8 +6642,8 @@ M: Alexander Aring M: Stefan Schmidt L: linux-wpan@vger.kernel.org W: http://wpan.cakelab.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan-next.git S: Maintained F: net/ieee802154/ F: net/mac802154/ From 581fe0ea61584d88072527ae9fb9dcb9d1f2783e Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 22 Sep 2017 19:42:37 -0400 Subject: [PATCH 276/279] net: orphan frags on stand-alone ptype in dev_queue_xmit_nit Zerocopy skbs frags are copied when the skb is looped to a local sock. Commit 1080e512d44d ("net: orphan frags on receive") introduced calls to skb_orphan_frags to deliver_skb and __netif_receive_skb for this. With msg_zerocopy, these skbs can also exist in the tx path and thus loop from dev_queue_xmit_nit. This already calls deliver_skb in its loop. But it does not orphan before a separate pt_prev->func(). Add the missing skb_orphan_frags_rx. Changes v1->v2: handle skb_orphan_frags_rx failure Fixes: 1f8b977ab32d ("sock: enable MSG_ZEROCOPY") Signed-off-by: Willem de Bruijn Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9a2254f9802f..588b473194a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1948,8 +1948,12 @@ again: goto again; } out_unlock: - if (pt_prev) - pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); + if (pt_prev) { + if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC)) + pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); + else + kfree_skb(skb2); + } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(dev_queue_xmit_nit); From cbb2fb5c72f48d3029c144be0f0e61da1c7bccf7 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 22 Sep 2017 20:20:06 -0400 Subject: [PATCH 277/279] net: set tb->fast_sk_family We need to set the tb->fast_sk_family properly so we can use the proper comparison function for all subsequent reuseport bind requests. Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk") Reported-and-tested-by: Cole Robinson Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b9c64b40a83a..f87f4805e244 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -328,6 +328,7 @@ success: tb->fastuid = uid; tb->fast_rcv_saddr = sk->sk_rcv_saddr; tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; #endif @@ -354,6 +355,7 @@ success: tb->fastuid = uid; tb->fast_rcv_saddr = sk->sk_rcv_saddr; tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; #endif From 7a56673b58f2414679e926bba80309a037a4fd35 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 22 Sep 2017 20:20:07 -0400 Subject: [PATCH 278/279] net: use inet6_rcv_saddr to compare sockets In ipv6_rcv_saddr_equal() we need to use inet6_rcv_saddr(sk) for the ipv6 compare with the fast socket information to make sure we're doing the proper comparisons. Fixes: 637bc8bbe6c0 ("inet: reset tb->fastreuseport when adding a reuseport sk") Reported-and-tested-by: Cole Robinson Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f87f4805e244..a1bf30438bc5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, #if IS_ENABLED(CONFIG_IPV6) if (tb->fast_sk_family == AF_INET6) return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, - &sk->sk_v6_rcv_saddr, + inet6_rcv_saddr(sk), tb->fast_rcv_saddr, sk->sk_rcv_saddr, tb->fast_ipv6_only, From fbed24bcc69d3e48c5402c371f19f5c7688871e5 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 22 Sep 2017 20:20:08 -0400 Subject: [PATCH 279/279] inet: fix improper empty comparison When doing my reuseport rework I screwed up and changed a if (hlist_empty(&tb->owners)) to if (!hlist_empty(&tb->owners)) This is obviously bad as all of the reuseport/reuse logic was reversed, which caused weird problems like allowing an ipv4 bind conflict if we opened an ipv4 only socket on a port followed by an ipv6 only socket on the same port. Fixes: b9470c27607b ("inet: kill smallest_size and smallest_port") Reported-by: Cole Robinson Signed-off-by: Josef Bacik Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a1bf30438bc5..c039c937ba90 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -321,7 +321,7 @@ tb_found: goto fail_unlock; } success: - if (!hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->owners)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { tb->fastreuseport = FASTREUSEPORT_ANY;