Commit Graph

277027 Commits

Author SHA1 Message Date
KOSAKI Motohiro
e26a51148f mm/mempolicy.c: refix mbind_range() vma issue
commit 8aacc9f550 ("mm/mempolicy.c: fix pgoff in mbind vma merge") is the
slightly incorrect fix.

Why? Think following case.

1. map 4 pages of a file at offset 0

   [0123]

2. map 2 pages just after the first mapping of the same file but with
   page offset 2

   [0123][23]

3. mbind() 2 pages from the first mapping at offset 2.
   mbind_range() should treat new vma is,

   [0123][23]
     |23|
     mbind vma

   but it does

   [0123][23]
     |01|
     mbind vma

   Oops. then, it makes wrong vma merge and splitting ([01][0123] or similar).

This patch fixes it.

[testcase]
  test result - before the patch

	case4: 126: test failed. expect '2,4', actual '2,2,2'
       	case5: passed
	case6: passed
	case7: passed
	case8: passed
	case_n: 246: test failed. expect '4,2', actual '1,4'

	------------[ cut here ]------------
	kernel BUG at mm/filemap.c:135!
	invalid opcode: 0000 [#4] SMP DEBUG_PAGEALLOC

	(snip long bug on messages)

  test result - after the patch

	case4: passed
       	case5: passed
	case6: passed
	case7: passed
	case8: passed
	case_n: passed

  source:  mbind_vma_test.c
============================================================
 #include <numaif.h>
 #include <numa.h>
 #include <sys/mman.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>

static unsigned long pagesize;
void* mmap_addr;
struct bitmask *nmask;
char buf[1024];
FILE *file;
char retbuf[10240] = "";
int mapped_fd;

char *rubysrc = "ruby -e '\
  pid = %d; \
  vstart = 0x%llx; \
  vend = 0x%llx; \
  s = `pmap -q #{pid}`; \
  rary = []; \
  s.each_line {|line|; \
    ary=line.split(\" \"); \
    addr = ary[0].to_i(16); \
    if(vstart <= addr && addr < vend) then \
      rary.push(ary[1].to_i()/4); \
    end; \
  }; \
  print rary.join(\",\"); \
'";

void init(void)
{
	void* addr;
	char buf[128];

	nmask = numa_allocate_nodemask();
	numa_bitmask_setbit(nmask, 0);

	pagesize = getpagesize();

	sprintf(buf, "%s", "mbind_vma_XXXXXX");
	mapped_fd = mkstemp(buf);
	if (mapped_fd == -1)
		perror("mkstemp "), exit(1);
	unlink(buf);

	if (lseek(mapped_fd, pagesize*8, SEEK_SET) < 0)
		perror("lseek "), exit(1);
	if (write(mapped_fd, "\0", 1) < 0)
		perror("write "), exit(1);

	addr = mmap(NULL, pagesize*8, PROT_NONE,
		    MAP_SHARED, mapped_fd, 0);
	if (addr == MAP_FAILED)
		perror("mmap "), exit(1);

	if (mprotect(addr+pagesize, pagesize*6, PROT_READ|PROT_WRITE) < 0)
		perror("mprotect "), exit(1);

	mmap_addr = addr + pagesize;

	/* make page populate */
	memset(mmap_addr, 0, pagesize*6);
}

void fin(void)
{
	void* addr = mmap_addr - pagesize;
	munmap(addr, pagesize*8);

	memset(buf, 0, sizeof(buf));
	memset(retbuf, 0, sizeof(retbuf));
}

void mem_bind(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_BIND, nmask->maskp, nmask->size, 0);
	if (err)
		perror("mbind "), exit(err);
}

void mem_interleave(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_INTERLEAVE, nmask->maskp, nmask->size, 0);
	if (err)
		perror("mbind "), exit(err);
}

void mem_unbind(int index, int len)
{
	int err;

	err = mbind(mmap_addr+pagesize*index, pagesize*len,
		    MPOL_DEFAULT, NULL, 0, 0);
	if (err)
		perror("mbind "), exit(err);
}

void Assert(char *expected, char *value, char *name, int line)
{
	if (strcmp(expected, value) == 0) {
		fprintf(stderr, "%s: passed\n", name);
		return;
	}
	else {
		fprintf(stderr, "%s: %d: test failed. expect '%s', actual '%s'\n",
			name, line,
			expected, value);
//		exit(1);
	}
}

/*
      AAAA
    PPPPPPNNNNNN
    might become
    PPNNNNNNNNNN
    case 4 below
*/
void case4(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 4);
	mem_unbind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("2,4", retbuf, "case4", __LINE__);

	fin();
}

/*
       AAAA
 PPPPPPNNNNNN
 might become
 PPPPPPPPPPNN
 case 5 below
*/
void case5(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case5", __LINE__);

	fin();
}

/*
	    AAAA
	PPPPNNNNXXXX
	might become
	PPPPPPPPPPPP 6
*/
void case6(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_bind(4, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("6", retbuf, "case6", __LINE__);

	fin();
}

/*
    AAAA
PPPPNNNNXXXX
might become
PPPPPPPPXXXX 7
*/
void case7(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_interleave(4, 2);
	mem_bind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case7", __LINE__);

	fin();
}

/*
    AAAA
PPPPNNNNXXXX
might become
PPPPNNNNNNNN 8
*/
void case8(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	mem_bind(0, 2);
	mem_interleave(4, 2);
	mem_interleave(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("2,4", retbuf, "case8", __LINE__);

	fin();
}

void case_n(void)
{
	init();
	sprintf(buf, rubysrc, getpid(), mmap_addr, mmap_addr+pagesize*6);

	/* make redundunt mappings [0][1234][34][7] */
	mmap(mmap_addr + pagesize*4, pagesize*2, PROT_READ|PROT_WRITE,
	     MAP_FIXED|MAP_SHARED, mapped_fd, pagesize*3);

	/* Expect to do nothing. */
	mem_unbind(2, 2);

	file = popen(buf, "r");
	fread(retbuf, sizeof(retbuf), 1, file);
	Assert("4,2", retbuf, "case_n", __LINE__);

	fin();
}

int main(int argc, char** argv)
{
	case4();
	case5();
	case6();
	case7();
	case8();
	case_n();

	return 0;
}
=============================================================

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Caspar Zhang <caspar@casparzhang.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: <stable@vger.kernel.org>		[3.1.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-29 16:31:57 -08:00
Hans de Goede
757e55c23d gspca: Fix bulk mode cameras no longer working (regression fix)
The new iso bandwidth calculation code accidentally has broken support
for bulk mode cameras. This has broken the following drivers:
finepix, jeilinj, ovfx2, ov534, ov534_9, se401, sq905, sq905c, sq930x,
stv0680, vicam.

Thix patch fixes this. Fix tested with: se401, sq905, sq905c, stv0680 & vicam
cams.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-29 16:29:00 -08:00
Keith Packard
371de6e4e0 drm/i915: Disable RC6 on Sandybridge by default
RC6 fails again.

> I found my system freeze mostly during starting up X and KDE. Sometimes it
> works for some minutes, sometimes it freezes immediatly. When the freeze
> happens, everything is dead (even the reset button does not work, I need to
> power cycle).

> I disabled RC6, and my system runs wonderfully.

> The system is a Z68 Pro board with Sandybridge i5-2500K processor, 8
> GB of RAM and UEFI firmware.

Reported-by: Kai Krakow <hurikhan77@gmail.com>
Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-26 21:07:27 -08:00
Keith Packard
ebbd857e6b drm/i915: Disable semaphores by default on SNB
Semaphores still cause problems on some machines:

> From Udo Steinberg:
>
> With Linux-3.2-rc6 I'm frequently seeing GPU hangs when large amounts of
> text scroll in an xterm, such as when extracting a tar archive. Such as this
> one (note the timestamps):
>
>  I can reproduce it fairly easily with something
>  as simple as:
>
>	  while true; do dmesg; done

This patch turns them off on SNB while leaving them on for IVB.

Reported-by: Udo Steinberg <udo@hypervisor.org>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Eugeni Dodonov <eugeni@dodonov.net>
Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-26 21:07:26 -08:00
Linus Torvalds
7f54492fbc Merge branch 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/3.2' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: e500: include linux/export.h
  KVM: PPC: fix kvmppc_start_thread() for CONFIG_SMP=N
  KVM: PPC: protect use of kvmppc_h_pr
  KVM: PPC: move compute_tlbie_rb to book3s_64 common header
  KVM: Don't automatically expose the TSC deadline timer in cpuid
  KVM: Device assignment permission checks
  KVM: Remove ability to assign a device without iommu support
  KVM: x86: Prevent starting PIT timers in the absence of irqchip support
2011-12-26 13:17:00 -08:00
Linus Torvalds
6fd8fb7f55 post 3.2-rc7 pull request
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.17 (GNU/Linux)
 
 iQIcBAABAgAGBQJO9yDSAAoJEHnzb7JUXXnQgF8P/39avSfBFousCwaLSctLE+Jh
 2X+FNRsf9oz+tayRhgaJPtxJ1VUvbjOUlPi+cq1Abwq2IrshkwOWF0RlGeXCnSJq
 7KV0/xGE3c+fuE7cGMBw7MdQMNbLvgv3JCzhKLVgEbwCgy+cEFC3SzDr97cK8CtA
 D5hs29Hv1ZM9M3KyZXMDg9EMaebc3UuY9CKIzQfqAxrqUEVirsSLQPF942ze6e5/
 6c4QoVAvekzqx4wVcEHQLXnIg7/+hy3AL1KDDndzT3VWKt3z9ilTGNOH0s/3jr5I
 tHtEadDHKplU2itHA9Z3v41qEtRZKGbehcC/rM0Aotj6rNPs3In5hRtwHhreiAx7
 RsNGkeICEGwdPHMV1U8dBfRS/FUIBxebIrL2SQJRj+iJJiMPTLh/wa4Sj3e9lEE9
 b945Pi3gN6aW3UGaNyR/2Pj/dCyeyQ20rEzCeOYZqkalg5RaQhURi97HeL/EFYZ3
 uiMKeJ7/d3YGhXJ109751/JXUtwzmmJsqsl6NG5APm3bMqHFMEKGT9zfwUAPXn8N
 ExsSVDAD0PiebAh3ZM2WL6/jrSPx9+I3WHYPuGj8k6PChi/LKc3ms+pt/G8+voqC
 7ZB5yvVBqUY0rpzvKZ4Ymzkt0kSRCyLmEgDusbwaPTwJiv3c5N6jyyP7nzOUnzBl
 X6bV0ngSLUJSGZjRilJp
 =OGls
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394

post 3.2-rc7 pull request

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ieee1394/linux1394:
  MAINTAINERS: firewire git URL update
2011-12-26 12:46:17 -08:00
Linus Torvalds
6d4b9e38d3 vfs: fix handling of lock allocation failure in lease-break case
Bruce Fields notes that commit 778fc546f7 ("locks: fix tracking of
inprogress lease breaks") introduced a possible error pointer
dereference on failure to allocate memory.  locks_conflict() will
dereference the passed-in new lease lock structure that may be an error pointer.

This means an open (without O_NONBLOCK set) on a file with a lease
applied (generally only done when Samba or nfsd (with v4) is running)
could crash if a kmalloc() fails.

So instead of playing games with IS_ERROR() all over the place, just
check the allocation failure early.  That makes the code more
straightforward, and avoids this possible bad pointer dereference.

Based-on-patch-by: J. Bruce Fields <bfields@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-26 10:25:26 -08:00
Scott Wood
fae9dbb4b4 KVM: PPC: e500: include linux/export.h
This is required for THIS_MODULE.  We recently stopped acquiring
it via some other header.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-12-26 13:28:03 +02:00
Michael Neuling
251da03897 KVM: PPC: fix kvmppc_start_thread() for CONFIG_SMP=N
Currently kvmppc_start_thread() tries to wake other SMT threads via
xics_wake_cpu().  Unfortunately xics_wake_cpu only exists when
CONFIG_SMP=Y so when compiling with CONFIG_SMP=N we get:

  arch/powerpc/kvm/built-in.o: In function `.kvmppc_start_thread':
  book3s_hv.c:(.text+0xa1e0): undefined reference to `.xics_wake_cpu'

The following should be fine since kvmppc_start_thread() shouldn't
called to start non-zero threads when SMP=N since threads_per_core=1.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-12-26 13:28:02 +02:00
Andreas Schwab
96f38d7286 KVM: PPC: protect use of kvmppc_h_pr
kvmppc_h_pr is only available if CONFIG_KVM_BOOK3S_64_PR.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-12-26 13:28:01 +02:00
Andreas Schwab
36cc66d638 KVM: PPC: move compute_tlbie_rb to book3s_64 common header
compute_tlbie_rb is only used on ppc64 and cannot be compiled on ppc32.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2011-12-26 13:28:00 +02:00
Jan Kiszka
4d25a066b6 KVM: Don't automatically expose the TSC deadline timer in cpuid
Unlike all of the other cpuid bits, the TSC deadline timer bit is set
unconditionally, regardless of what userspace wants.

This is broken in several ways:
 - if userspace doesn't use KVM_CREATE_IRQCHIP, and doesn't emulate the TSC
   deadline timer feature, a guest that uses the feature will break
 - live migration to older host kernels that don't support the TSC deadline
   timer will cause the feature to be pulled from under the guest's feet;
   breaking it
 - guests that are broken wrt the feature will fail.

Fix by not enabling the feature automatically; instead report it to userspace.
Because the feature depends on KVM_CREATE_IRQCHIP, which we cannot guarantee
will be called, we expose it via a KVM_CAP_TSC_DEADLINE_TIMER and not
KVM_GET_SUPPORTED_CPUID.

Fixes the Illumos guest kernel, which uses the TSC deadline timer feature.

[avi: add the KVM_CAP + documentation]

Reported-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Tested-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-12-26 13:27:44 +02:00
Alex Williamson
3d27e23b17 KVM: Device assignment permission checks
Only allow KVM device assignment to attach to devices which:

 - Are not bridges
 - Have BAR resources (assume others are special devices)
 - The user has permissions to use

Assigning a bridge is a configuration error, it's not supported, and
typically doesn't result in the behavior the user is expecting anyway.
Devices without BAR resources are typically chipset components that
also don't have host drivers.  We don't want users to hold such devices
captive or cause system problems by fencing them off into an iommu
domain.  We determine "permission to use" by testing whether the user
has access to the PCI sysfs resource files.  By default a normal user
will not have access to these files, so it provides a good indication
that an administration agent has granted the user access to the device.

[Yang Bai: add missing #include]
[avi: fix comment style]

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Yang Bai <hamo.by@gmail.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
2011-12-25 19:03:54 +02:00
Alex Williamson
423873736b KVM: Remove ability to assign a device without iommu support
This option has no users and it exposes a security hole that we
can allow devices to be assigned without iommu protection.  Make
KVM_DEV_ASSIGN_ENABLE_IOMMU a mandatory option.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
2011-12-25 17:13:31 +02:00
Jan Kiszka
0924ab2cfa KVM: x86: Prevent starting PIT timers in the absence of irqchip support
User space may create the PIT and forgets about setting up the irqchips.
In that case, firing PIT IRQs will crash the host:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000128
IP: [<ffffffffa10f6280>] kvm_set_irq+0x30/0x170 [kvm]
...
Call Trace:
 [<ffffffffa11228c1>] pit_do_work+0x51/0xd0 [kvm]
 [<ffffffff81071431>] process_one_work+0x111/0x4d0
 [<ffffffff81071bb2>] worker_thread+0x152/0x340
 [<ffffffff81075c8e>] kthread+0x7e/0x90
 [<ffffffff815a4474>] kernel_thread_helper+0x4/0x10

Prevent this by checking the irqchip mode before starting a timer. We
can't deny creating the PIT if the irqchips aren't set up yet as
current user land expects this order to work.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
2011-12-25 17:13:18 +02:00
Stefan Richter
2ca526bf49 MAINTAINERS: firewire git URL update
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
2011-12-25 14:05:05 +01:00
Linus Torvalds
4962516b23 Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
* 'drm-fixes' of git://people.freedesktop.org/~airlied/linux:
  vmwgfx: fix incorrect VRAM size check in vmw_kms_fb_create()
  drm/radeon/kms: bail on BTC parts if MC ucode is missing
2011-12-24 13:34:44 -08:00
Linus Torvalds
5f0a6e2d50 Linux 3.2-rc7 2011-12-23 21:51:06 -08:00
Linus Torvalds
a22681fabb Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  VFS: Fix race between CPU hotplug and lglocks
2011-12-23 21:47:28 -08:00
Linus Torvalds
6d451c578c for linus: writeback reason binary tracing format fix
-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJO9EbVAAoJECvKgwp+S8JaUG0P/RDICTvG5b6/YD1wwh4cHBTF
 xu4av5o+Okablr282vLt1d9N4nLP6A4Jp2XOxNoLdyUVMtwRNCMjO62vcBetKmqU
 9GJTKh3H72/amqNrfvf9E0Fl3rOv2U71x7k4KTwKVdUvITXEL/U0Vsl8a9WVNUZ0
 mZERzf0vrOCSN6gEzh4iNzMuZpKRSnNNP4iUilkwcD9cXPk85hFCNZx/nyMhKtcF
 9XzhSJgg1wJAwmBc9bdhkEm7jKYvxmslb4nMdQHoQNDGpEjwRbS7jQ/iHuD2AhPH
 DFTQ8LOhxxaTOiDjHJav0z/FRw+q6ZYbrkbLVt2qTOxfMxvHJdlfu7vTglq4PK9n
 Bo02K9zZisCM76uCUTHcp1aMjzU9tsx9tYipBz8YXNPoEuhYn/1F3tbt7FkCGBck
 wwTCe/J0+IKHWiXSAkZMj5PiSeMwliMpF7INdkLExkinwNu719dS6pTZDs/o8CMD
 M/0/M8jYnWOmylYDAbhKyEzAAHbAm0YGuUG7IVGP0H5YJucfmRGJzQMNaBTUUsP7
 pXdFA02rUTodCrSHqXscmA0Lb9ypsFnmAYMbb+YF5UNOW9zcQ9b2J23wmna7prIv
 FNKVAgDEjWk/SpN0mG3zZk7ixUagkbo9DfalZCBZsveZPktq1KZor1KaOIFzkUuB
 DUdtr4+GjhfDqFWywZ9+
 =dOhj
 -----END PGP SIGNATURE-----

Merge tag 'writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux

for linus: writeback reason binary tracing format fix

* tag 'writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux:
  writeback: show writeback reason with __print_symbolic
2011-12-23 20:25:36 -08:00
Linus Torvalds
71448c1f4f Merge branch 'rc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
* 'rc-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild:
  kconfig: adapt update-po-config to new UML layout
2011-12-23 15:01:24 -08:00
Linus Torvalds
4d18de9449 Merge branch 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media
* 'v4l_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-media:
  [media] omap3isp: Fix crash caused by subdevs now having a pointer to devnodes
2011-12-23 14:59:08 -08:00
Linus Torvalds
827fa4c762 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: call d_instantiate after all ops are setup
  Btrfs: fix worker lock misuse in find_worker
2011-12-23 14:58:39 -08:00
Linus Torvalds
5d219c6b9f Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc:
  sparc64: Fix MSIQ HV call ordering in pci_sun4v_msiq_build_irq().
2011-12-23 14:58:14 -08:00
Linus Torvalds
155d4551bd Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
  netfilter: xt_connbytes: handle negation correctly
  net: relax rcvbuf limits
  rps: fix insufficient bounds checking in store_rps_dev_flow_table_cnt()
  net: introduce DST_NOPEER dst flag
  mqprio: Avoid panic if no options are provided
  bridge: provide a mtu() method for fake_dst_ops
2011-12-23 14:57:55 -08:00
David S. Miller
6350323ad8 Merge branch 'nf' of git://1984.lsi.us.es/net 2011-12-23 14:29:20 -05:00
Florian Westphal
0354b48f63 netfilter: xt_connbytes: handle negation correctly
"! --connbytes 23:42" should match if the packet/byte count is not in range.

As there is no explict "invert match" toggle in the match structure,
userspace swaps the from and to arguments
(i.e., as if "--connbytes 42:23" were given).

However, "what <= 23 && what >= 42" will always be false.

Change things so we use "||" in case "from" is larger than "to".

This change may look like it breaks backwards compatibility when "to" is 0.
However, older iptables binaries will refuse "connbytes 42:0",
and current releases treat it to mean "! --connbytes 0:42",
so we should be fine.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
2011-12-23 14:50:19 +01:00
Al Viro
08c422c27f Btrfs: call d_instantiate after all ops are setup
This closes races where btrfs is calling d_instantiate too soon during
inode creation.  All of the callers of btrfs_add_nondir are updated to
instantiate after the inode is fully setup in memory.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
2011-12-23 08:02:26 -05:00
Chris Mason
8d532b2afb Btrfs: fix worker lock misuse in find_worker
Dan Carpenter noticed that we were doing a double unlock on the worker
lock, and sometimes picking a worker thread without the lock held.

This fixes both errors.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
2011-12-23 07:53:00 -05:00
Eric Dumazet
0fd7bac6b6 net: relax rcvbuf limits
skb->truesize might be big even for a small packet.

Its even bigger after commit 87fb4b7b53 (net: more accurate skb
truesize) and big MTU.

We should allow queueing at least one packet per receiver, even with a
low RCVBUF setting.

Reported-by: Michal Simek <monstr@monstr.eu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-23 02:15:14 -05:00
Xi Wang
a0a129f8b6 rps: fix insufficient bounds checking in store_rps_dev_flow_table_cnt()
Setting a large rps_flow_cnt like (1 << 30) on 32-bit platform will
cause a kernel oops due to insufficient bounds checking.

	if (count > 1<<30) {
		/* Enforce a limit to prevent overflow */
		return -EINVAL;
	}
	count = roundup_pow_of_two(count);
	table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));

Note that the macro RPS_DEV_FLOW_TABLE_SIZE(count) is defined as:

	... + (count * sizeof(struct rps_dev_flow))

where sizeof(struct rps_dev_flow) is 8.  (1 << 30) * 8 will overflow
32 bits.

This patch replaces the magic number (1 << 30) with a symbolic bound.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-22 22:34:56 -05:00
Eric Dumazet
e688a60480 net: introduce DST_NOPEER dst flag
Chris Boot reported crashes occurring in ipv6_select_ident().

[  461.457562] RIP: 0010:[<ffffffff812dde61>]  [<ffffffff812dde61>]
ipv6_select_ident+0x31/0xa7

[  461.578229] Call Trace:
[  461.580742] <IRQ>
[  461.582870]  [<ffffffff812efa7f>] ? udp6_ufo_fragment+0x124/0x1a2
[  461.589054]  [<ffffffff812dbfe0>] ? ipv6_gso_segment+0xc0/0x155
[  461.595140]  [<ffffffff812700c6>] ? skb_gso_segment+0x208/0x28b
[  461.601198]  [<ffffffffa03f236b>] ? ipv6_confirm+0x146/0x15e
[nf_conntrack_ipv6]
[  461.608786]  [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77
[  461.614227]  [<ffffffff81271d64>] ? dev_hard_start_xmit+0x357/0x543
[  461.620659]  [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111
[  461.626440]  [<ffffffffa0379745>] ? br_parse_ip_options+0x19a/0x19a
[bridge]
[  461.633581]  [<ffffffff812722ff>] ? dev_queue_xmit+0x3af/0x459
[  461.639577]  [<ffffffffa03747d2>] ? br_dev_queue_push_xmit+0x72/0x76
[bridge]
[  461.646887]  [<ffffffffa03791e3>] ? br_nf_post_routing+0x17d/0x18f
[bridge]
[  461.653997]  [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77
[  461.659473]  [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge]
[  461.665485]  [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111
[  461.671234]  [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge]
[  461.677299]  [<ffffffffa0379215>] ?
nf_bridge_update_protocol+0x20/0x20 [bridge]
[  461.684891]  [<ffffffffa03bb0e5>] ? nf_ct_zone+0xa/0x17 [nf_conntrack]
[  461.691520]  [<ffffffffa0374760>] ? br_flood+0xfa/0xfa [bridge]
[  461.697572]  [<ffffffffa0374812>] ? NF_HOOK.constprop.8+0x3c/0x56
[bridge]
[  461.704616]  [<ffffffffa0379031>] ?
nf_bridge_push_encap_header+0x1c/0x26 [bridge]
[  461.712329]  [<ffffffffa037929f>] ? br_nf_forward_finish+0x8a/0x95
[bridge]
[  461.719490]  [<ffffffffa037900a>] ?
nf_bridge_pull_encap_header+0x1c/0x27 [bridge]
[  461.727223]  [<ffffffffa0379974>] ? br_nf_forward_ip+0x1c0/0x1d4 [bridge]
[  461.734292]  [<ffffffff81291c4d>] ? nf_iterate+0x41/0x77
[  461.739758]  [<ffffffffa03748cc>] ? __br_deliver+0xa0/0xa0 [bridge]
[  461.746203]  [<ffffffff81291cf6>] ? nf_hook_slow+0x73/0x111
[  461.751950]  [<ffffffffa03748cc>] ? __br_deliver+0xa0/0xa0 [bridge]
[  461.758378]  [<ffffffffa037533a>] ? NF_HOOK.constprop.4+0x56/0x56
[bridge]

This is caused by bridge netfilter special dst_entry (fake_rtable), a
special shared entry, where attaching an inetpeer makes no sense.

Problem is present since commit 87c48fa3b4 (ipv6: make fragment
identifications less predictable)

Introduce DST_NOPEER dst flag and make sure ipv6_select_ident() and
__ip_select_ident() fallback to the 'no peer attached' handling.

Reported-by: Chris Boot <bootc@bootc.net>
Tested-by: Chris Boot <bootc@bootc.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-22 22:34:56 -05:00
Thomas Graf
7838f2ce36 mqprio: Avoid panic if no options are provided
Userspace may not provide TCA_OPTIONS, in fact tc currently does
so not do so if no arguments are specified on the command line.
Return EINVAL instead of panicing.

Signed-off-by: Thomas Graf <tgraf@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-22 22:34:56 -05:00
Eric Dumazet
a13861a28b bridge: provide a mtu() method for fake_dst_ops
Commit 618f9bc74a (net: Move mtu handling down to the protocol
depended handlers) forgot the bridge netfilter case, adding a NULL
dereference in ip_fragment().

Reported-by: Chris Boot <bootc@bootc.net>
CC: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-22 22:34:56 -05:00
Linus Torvalds
ad1fca2003 Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
  md/bitmap: It is OK to clear bits during recovery.
  md: don't give up looking for spares on first failure-to-add
  md/raid5: ensure correct assessment of drives during degraded reshape.
  md/linear: fix hot-add of devices to linear arrays.
2011-12-22 15:36:17 -08:00
NeilBrown
961902c0f8 md/bitmap: It is OK to clear bits during recovery.
commit d0a4bb4927 introduced a
regression which is annoying but fairly harmless.

When writing to an array that is undergoing recovery (a spare
in being integrated into the array), writing to the array will
set bits in the bitmap, but they will not be cleared when the
write completes.

For bits covering areas that have not been recovered yet this is not a
problem as the recovery will clear the bits.  However bits set in
already-recovered region will stay set and never be cleared.
This doesn't risk data integrity.  The only negatives are:
 - next time there is a crash, more resyncing than necessary will
   be done.
 - the bitmap doesn't look clean, which is confusing.

While an array is recovering we don't want to update the
'events_cleared' setting in the bitmap but we do still want to clear
bits that have very recently been set - providing they were written to
the recovering device.

So split those two needs - which previously both depended on 'success'
and always clear the bit of the write went to all devices.

Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23 09:57:48 +11:00
NeilBrown
60fc13702a md: don't give up looking for spares on first failure-to-add
Before performing a recovery we try to remove any spares that
might not be working, then add any that might have become relevant.

Currently we abort on the first spare that cannot be added.
This is a false optimisation.
It is conceivable that - depending on rules in the personality - a
subsequent spare might be accepted.
Also the loop does other things like count the available spares and
reset the 'recovery_offset' value.

If we abort early these might not happen properly.

So remove the early abort.

In particular if you have an array what is undergoing recovery and
which has extra spares, then the recovery may not restart after as
reboot as the could of 'spares' might end up as zero.

Reported-by: Anssi Hannula <anssi.hannula@iki.fi>
Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23 09:57:19 +11:00
NeilBrown
30d7a48368 md/raid5: ensure correct assessment of drives during degraded reshape.
While reshaping a degraded array (as when reshaping a RAID0 by first
converting it to a degraded RAID4) we currently get confused about
which devices are in_sync.  In most cases we get it right, but in the
region that is being reshaped we need to treat non-failed devices as
in-sync when we have the data but haven't actually written it out yet.

Reported-by: Adam Kwolek <adam.kwolek@intel.com>
Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23 09:57:00 +11:00
NeilBrown
09cd9270ea md/linear: fix hot-add of devices to linear arrays.
commit d70ed2e4fa
broke hot-add to a linear array.
After that commit, metadata if not written to devices until they
have been fully integrated into the array as determined by
saved_raid_disk.  That patch arranged to clear that field after
a recovery completed.

However for linear arrays, there is no recovery - the integration is
instantaneous.  So we need to explicitly clear the saved_raid_disk
field.

Signed-off-by: NeilBrown <neilb@suse.de>
2011-12-23 09:56:55 +11:00
David S. Miller
7cc8583372 sparc64: Fix MSIQ HV call ordering in pci_sun4v_msiq_build_irq().
This silently was working for many years and stopped working on
Niagara-T3 machines.

We need to set the MSIQ to VALID before we can set it's state to IDLE.

On Niagara-T3, setting the state to IDLE first was causing HV_EINVAL
errors.  The hypervisor documentation says, rather ambiguously, that
the MSIQ must be "initialized" before one can set the state.

I previously understood this to mean merely that a successful setconf()
operation has been performed on the MSIQ, which we have done at this
point.  But it seems to also mean that it has been set VALID too.

Signed-off-by: David S. Miller <davem@davemloft.net>
2011-12-22 13:46:53 -08:00
Linus Torvalds
b3b1b70e62 Merge branch 'usb-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb
* 'usb-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb:
  USB: Fix usb/isp1760 build on sparc
  usb: gadget: epautoconf: do not change number of streams
  usb: dwc3: core: fix cached revision on our structure
  usb: musb: fix reset issue with full speed device
2011-12-22 12:59:47 -08:00
Linus Torvalds
abe8809c14 Merge branch 'upstream-linus' of git://github.com/jgarzik/libata-dev
* 'upstream-linus' of git://github.com/jgarzik/libata-dev:
  pata_of_platform: Add missing CONFIG_OF_IRQ dependency.
2011-12-22 12:53:32 -08:00
David Miller
19d40dcabf pata_of_platform: Add missing CONFIG_OF_IRQ dependency.
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
2011-12-22 15:00:32 -05:00
Stephen Rothwell
b9eda06f80 ipv4: using prefetch requires including prefetch.h
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-12-22 09:16:09 -08:00
Xi Wang
8a78389651 vmwgfx: fix incorrect VRAM size check in vmw_kms_fb_create()
Commit e133e737 didn't correctly fix the integer overflow issue.

-	unsigned int required_size;
+	u64 required_size;
	...
	required_size = mode_cmd->pitch * mode_cmd->height;
-	if (unlikely(required_size > dev_priv->vram_size)) {
+	if (unlikely(required_size > (u64) dev_priv->vram_size)) {

Note that both pitch and height are u32.  Their product is still u32 and
would overflow before being assigned to required_size.  A correct way is
to convert pitch and height to u64 before the multiplication.

	required_size = (u64)mode_cmd->pitch * (u64)mode_cmd->height;

This patch calls the existing vmw_kms_validate_mode_vram() for
validation.

Signed-off-by: Xi Wang <xi.wang@gmail.com>
Reviewed-and-tested-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-12-22 12:31:02 +00:00
Alex Deucher
77e00f2ea9 drm/radeon/kms: bail on BTC parts if MC ucode is missing
We already do this for cayman, need to also do it for
BTC parts.  The default memory and voltage setup is not
adequate for advanced operation.  Continuing will
result in an unusable display.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@kernel.org
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
2011-12-22 12:28:01 +00:00
Srivatsa S. Bhat
e30e2fdfe5 VFS: Fix race between CPU hotplug and lglocks
Currently, the *_global_[un]lock_online() routines are not at all synchronized
with CPU hotplug. Soft-lockups detected as a consequence of this race was
reported earlier at https://lkml.org/lkml/2011/8/24/185. (Thanks to Cong Meng
for finding out that the root-cause of this issue is the race condition
between br_write_[un]lock() and CPU hotplug, which results in the lock states
getting messed up).

Fixing this race by just adding {get,put}_online_cpus() at appropriate places
in *_global_[un]lock_online() is not a good option, because, then suddenly
br_write_[un]lock() would become blocking, whereas they have been kept as
non-blocking all this time, and we would want to keep them that way.

So, overall, we want to ensure 3 things:
1. br_write_lock() and br_write_unlock() must remain as non-blocking.
2. The corresponding lock and unlock of the per-cpu spinlocks must not happen
   for different sets of CPUs.
3. Either prevent any new CPU online operation in between this lock-unlock, or
   ensure that the newly onlined CPU does not proceed with its corresponding
   per-cpu spinlock unlocked.

To achieve all this:
(a) We introduce a new spinlock that is taken by the *_global_lock_online()
    routine and released by the *_global_unlock_online() routine.
(b) We register a callback for CPU hotplug notifications, and this callback
    takes the same spinlock as above.
(c) We maintain a bitmap which is close to the cpu_online_mask, and once it is
    initialized in the lock_init() code, all future updates to it are done in
    the callback, under the above spinlock.
(d) The above bitmap is used (instead of cpu_online_mask) while locking and
    unlocking the per-cpu locks.

The callback takes the spinlock upon the CPU_UP_PREPARE event. So, if the
br_write_lock-unlock sequence is in progress, the callback keeps spinning,
thus preventing the CPU online operation till the lock-unlock sequence is
complete. This takes care of requirement (3).

The bitmap that we maintain remains unmodified throughout the lock-unlock
sequence, since all updates to it are managed by the callback, which takes
the same spinlock as the one taken by the lock code and released only by the
unlock routine. Combining this with (d) above, satisfies requirement (2).

Overall, since we use a spinlock (mentioned in (a)) to prevent CPU hotplug
operations from racing with br_write_lock-unlock, requirement (1) is also
taken care of.

By the way, it is to be noted that a CPU offline operation can actually run
in parallel with our lock-unlock sequence, because our callback doesn't react
to notifications earlier than CPU_DEAD (in order to maintain our bitmap
properly). And this means, since we use our own bitmap (which is stale, on
purpose) during the lock-unlock sequence, we could end up unlocking the
per-cpu lock of an offline CPU (because we had locked it earlier, when the
CPU was online), in order to satisfy requirement (2). But this is harmless,
though it looks a bit awkward.

Debugged-by: Cong Meng <mc@linux.vnet.ibm.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
2011-12-22 02:02:20 -05:00
Linus Torvalds
ecefc36b41 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net:
  net: Add a flow_cache_flush_deferred function
  ipv4: reintroduce route cache garbage collector
  net: have ipconfig not wait if no dev is available
  sctp: Do not account for sizeof(struct sk_buff) in estimated rwnd
  asix: new device id
  davinci-cpdma: fix locking issue in cpdma_chan_stop
  sctp: fix incorrect overflow check on autoclose
  r8169: fix Config2 MSIEnable bit setting.
  llc: llc_cmsg_rcv was getting called after sk_eat_skb.
  net: bpf_jit: fix an off-one bug in x86_64 cond jump target
  iwlwifi: update SCD BC table for all SCD queues
  Revert "Bluetooth: Revert: Fix L2CAP connection establishment"
  Bluetooth: Clear RFCOMM session timer when disconnecting last channel
  Bluetooth: Prevent uninitialized data access in L2CAP configuration
  iwlwifi: allow to switch to HT40 if not associated
  iwlwifi: tx_sync only on PAN context
  mwifiex: avoid double list_del in command cancel path
  ath9k: fix max phy rate at rate control init
  nfc: signedness bug in __nci_request()
  iwlwifi: do not set the sequence control bit is not needed
2011-12-21 18:29:26 -08:00
Linus Torvalds
d5ed5e48f4 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound:
  ALSA: atmel/ac97c: using software reset instead hardware reset if not available
2011-12-21 18:29:05 -08:00
Linus Torvalds
0703c680f6 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-2.6
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sameo/mfd-2.6:
  mfd: Include linux/io.h to jz4740-adc
  mfd: Use request_threaded_irq for twl4030-irq instead of irq_set_chained_handler
  mfd: Base interrupt for twl4030-irq must be one-shot
  mfd: Handle tps65910 clear-mask correctly
  mfd: add #ifdef CONFIG_DEBUG_FS guard for ab8500_debug_resources
  mfd: Fix twl-core oops while calling twl_i2c_* for unbound driver
  mfd: include linux/module.h for ab5500-debugfs
  mfd: Update wm8994 active device checks for WM1811
  mfd: Set tps6586x bits if new value is different from the old one
  mfd: Set da903x bits if new value is different from the old one
  mfd: Set adp5520 bits if new value is different from the old one
  mfd: Add missed free_irq in da903x_remove
2011-12-21 18:28:52 -08:00