linux/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c

213 lines
4.9 KiB
C
Raw Normal View History

/*
* Copyright 2009 Red Hat Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs
*/
#include "aux.h"
#include "pad.h"
static int
nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
{
struct nvkm_i2c_aux *aux = container_of(adap, typeof(*aux), i2c);
struct i2c_msg *msg = msgs;
int ret, mcnt = num;
ret = nvkm_i2c_aux_acquire(aux);
if (ret)
return ret;
while (mcnt--) {
u8 remaining = msg->len;
u8 *ptr = msg->buf;
while (remaining) {
drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX While I had thought I had fixed this issue in: commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after ->fini()") It turns out that while I did fix the error messages I was seeing on my P50 when trying to access i2c busses with the GPU in runtime suspend, I accidentally had missed one important detail that was mentioned on the bug report this commit was supposed to fix: that the CPU would only lock up when trying to access i2c busses _on connected devices_ _while the GPU is not in runtime suspend_. Whoops. That definitely explains why I was not able to get my machine to hang with i2c bus interactions until now, as plugging my P50 into it's dock with an HDMI monitor connected allowed me to finally reproduce this locally. Now that I have managed to reproduce this issue properly, it looks like the problem is much simpler then it looks. It turns out that some connected devices, such as MST laptop docks, will actually ACK i2c reads even if no data was actually read: [ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1 [ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000 [ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001 [ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 Because we don't handle the situation of i2c ack without any data, we end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the value of cnt always remains at 0. This finally properly explains how this could result in a CPU hang like the ones observed in the aforementioned commit. So, fix this by retrying transactions if no data is written or received, and give up and fail the transaction if we continue to not write or receive any data after 32 retries. Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-07-25 19:40:01 +00:00
u8 cnt, retries, cmd;
if (msg->flags & I2C_M_RD)
cmd = 1;
else
cmd = 0;
if (mcnt || remaining > 16)
cmd |= 4; /* MOT */
drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX While I had thought I had fixed this issue in: commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after ->fini()") It turns out that while I did fix the error messages I was seeing on my P50 when trying to access i2c busses with the GPU in runtime suspend, I accidentally had missed one important detail that was mentioned on the bug report this commit was supposed to fix: that the CPU would only lock up when trying to access i2c busses _on connected devices_ _while the GPU is not in runtime suspend_. Whoops. That definitely explains why I was not able to get my machine to hang with i2c bus interactions until now, as plugging my P50 into it's dock with an HDMI monitor connected allowed me to finally reproduce this locally. Now that I have managed to reproduce this issue properly, it looks like the problem is much simpler then it looks. It turns out that some connected devices, such as MST laptop docks, will actually ACK i2c reads even if no data was actually read: [ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1 [ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000 [ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001 [ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 Because we don't handle the situation of i2c ack without any data, we end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the value of cnt always remains at 0. This finally properly explains how this could result in a CPU hang like the ones observed in the aforementioned commit. So, fix this by retrying transactions if no data is written or received, and give up and fail the transaction if we continue to not write or receive any data after 32 retries. Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-07-25 19:40:01 +00:00
for (retries = 0, cnt = 0;
retries < 32 && !cnt;
retries++) {
cnt = min_t(u8, remaining, 16);
ret = aux->func->xfer(aux, true, cmd,
msg->addr, ptr, &cnt);
if (ret < 0)
goto out;
}
if (!cnt) {
AUX_TRACE(aux, "no data after 32 retries");
ret = -EIO;
goto out;
}
ptr += cnt;
remaining -= cnt;
}
msg++;
}
drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX While I had thought I had fixed this issue in: commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after ->fini()") It turns out that while I did fix the error messages I was seeing on my P50 when trying to access i2c busses with the GPU in runtime suspend, I accidentally had missed one important detail that was mentioned on the bug report this commit was supposed to fix: that the CPU would only lock up when trying to access i2c busses _on connected devices_ _while the GPU is not in runtime suspend_. Whoops. That definitely explains why I was not able to get my machine to hang with i2c bus interactions until now, as plugging my P50 into it's dock with an HDMI monitor connected allowed me to finally reproduce this locally. Now that I have managed to reproduce this issue properly, it looks like the problem is much simpler then it looks. It turns out that some connected devices, such as MST laptop docks, will actually ACK i2c reads even if no data was actually read: [ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1 [ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000 [ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001 [ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 Because we don't handle the situation of i2c ack without any data, we end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the value of cnt always remains at 0. This finally properly explains how this could result in a CPU hang like the ones observed in the aforementioned commit. So, fix this by retrying transactions if no data is written or received, and give up and fail the transaction if we continue to not write or receive any data after 32 retries. Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-07-25 19:40:01 +00:00
ret = num;
out:
nvkm_i2c_aux_release(aux);
drm/nouveau: Don't retry infinitely when receiving no data on i2c over AUX While I had thought I had fixed this issue in: commit 342406e4fbba ("drm/nouveau/i2c: Disable i2c bus access after ->fini()") It turns out that while I did fix the error messages I was seeing on my P50 when trying to access i2c busses with the GPU in runtime suspend, I accidentally had missed one important detail that was mentioned on the bug report this commit was supposed to fix: that the CPU would only lock up when trying to access i2c busses _on connected devices_ _while the GPU is not in runtime suspend_. Whoops. That definitely explains why I was not able to get my machine to hang with i2c bus interactions until now, as plugging my P50 into it's dock with an HDMI monitor connected allowed me to finally reproduce this locally. Now that I have managed to reproduce this issue properly, it looks like the problem is much simpler then it looks. It turns out that some connected devices, such as MST laptop docks, will actually ACK i2c reads even if no data was actually read: [ 275.063043] nouveau 0000:01:00.0: i2c: aux 000a: 1: 0000004c 1 [ 275.063447] nouveau 0000:01:00.0: i2c: aux 000a: 00 01101000 10040000 [ 275.063759] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000001 [ 275.064024] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064285] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 [ 275.064594] nouveau 0000:01:00.0: i2c: aux 000a: rd 00000000 Because we don't handle the situation of i2c ack without any data, we end up entering an infinite loop in nvkm_i2c_aux_i2c_xfer() since the value of cnt always remains at 0. This finally properly explains how this could result in a CPU hang like the ones observed in the aforementioned commit. So, fix this by retrying transactions if no data is written or received, and give up and fail the transaction if we continue to not write or receive any data after 32 retries. Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-07-25 19:40:01 +00:00
return ret;
}
static u32
nvkm_i2c_aux_i2c_func(struct i2c_adapter *adap)
{
return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
}
static const struct i2c_algorithm
nvkm_i2c_aux_i2c_algo = {
.master_xfer = nvkm_i2c_aux_i2c_xfer,
.functionality = nvkm_i2c_aux_i2c_func
};
void
nvkm_i2c_aux_monitor(struct nvkm_i2c_aux *aux, bool monitor)
{
struct nvkm_i2c_pad *pad = aux->pad;
AUX_TRACE(aux, "monitor: %s", monitor ? "yes" : "no");
if (monitor)
nvkm_i2c_pad_mode(pad, NVKM_I2C_PAD_AUX);
else
nvkm_i2c_pad_mode(pad, NVKM_I2C_PAD_OFF);
}
void
nvkm_i2c_aux_release(struct nvkm_i2c_aux *aux)
{
struct nvkm_i2c_pad *pad = aux->pad;
AUX_TRACE(aux, "release");
nvkm_i2c_pad_release(pad);
mutex_unlock(&aux->mutex);
}
int
nvkm_i2c_aux_acquire(struct nvkm_i2c_aux *aux)
{
struct nvkm_i2c_pad *pad = aux->pad;
int ret;
drm/nouveau/i2c: Disable i2c bus access after ->fini() For a while, we've had the problem of i2c bus access not grabbing a runtime PM ref when it's being used in userspace by i2c-dev, resulting in nouveau spamming the kernel log with errors if anything attempts to access the i2c bus while the GPU is in runtime suspend. An example: [ 130.078386] nouveau 0000:01:00.0: i2c: aux 000d: begin idle timeout ffffffff Since the GPU is in runtime suspend, the MMIO region that the i2c bus is on isn't accessible. On x86, the standard behavior for accessing an unavailable MMIO region is to just return ~0. Except, that turned out to be a lie. While computers with a clean concious will return ~0 in this scenario, some machines will actually completely hang a CPU on certian bad MMIO accesses. This was witnessed with someone's Lenovo ThinkPad P50, where sensors-detect attempting to access the i2c bus while the GPU was suspended would result in a CPU hang: CPU: 5 PID: 12438 Comm: sensors-detect Not tainted 5.0.0-0.rc4.git3.1.fc30.x86_64 #1 Hardware name: LENOVO 20EQS64N17/20EQS64N17, BIOS N1EET74W (1.47 ) 11/21/2017 RIP: 0010:ioread32+0x2b/0x30 Code: 81 ff ff ff 03 00 77 20 48 81 ff 00 00 01 00 76 05 0f b7 d7 ed c3 48 c7 c6 e1 0c 36 96 e8 2d ff ff ff b8 ff ff ff ff c3 8b 07 <c3> 0f 1f 40 00 49 89 f0 48 81 fe ff ff 03 00 76 04 40 88 3e c3 48 RSP: 0018:ffffaac3c5007b48 EFLAGS: 00000292 ORIG_RAX: ffffffffffffff13 RAX: 0000000001111000 RBX: 0000000001111000 RCX: 0000043017a97186 RDX: 0000000000000aaa RSI: 0000000000000005 RDI: ffffaac3c400e4e4 RBP: ffff9e6443902c00 R08: ffffaac3c400e4e4 R09: ffffaac3c5007be7 R10: 0000000000000004 R11: 0000000000000001 R12: ffff9e6445dd0000 R13: 000000000000e4e4 R14: 00000000000003c4 R15: 0000000000000000 FS: 00007f253155a740(0000) GS:ffff9e644f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005630d1500358 CR3: 0000000417c44006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: g94_i2c_aux_xfer+0x326/0x850 [nouveau] nvkm_i2c_aux_i2c_xfer+0x9e/0x140 [nouveau] __i2c_transfer+0x14b/0x620 i2c_smbus_xfer_emulated+0x159/0x680 ? _raw_spin_unlock_irqrestore+0x1/0x60 ? rt_mutex_slowlock.constprop.0+0x13d/0x1e0 ? __lock_is_held+0x59/0xa0 __i2c_smbus_xfer+0x138/0x5a0 i2c_smbus_xfer+0x4f/0x80 i2cdev_ioctl_smbus+0x162/0x2d0 [i2c_dev] i2cdev_ioctl+0x1db/0x2c0 [i2c_dev] do_vfs_ioctl+0x408/0x750 ksys_ioctl+0x5e/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x60/0x1e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f25317f546b Code: 0f 1e fa 48 8b 05 1d da 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed d9 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc88caab68 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00005630d0fe7260 RCX: 00007f25317f546b RDX: 00005630d1598e80 RSI: 0000000000000720 RDI: 0000000000000003 RBP: 00005630d155b968 R08: 0000000000000001 R09: 00005630d15a1da0 R10: 0000000000000070 R11: 0000000000000246 R12: 00005630d1598e80 R13: 00005630d12f3d28 R14: 0000000000000720 R15: 00005630d12f3ce0 watchdog: BUG: soft lockup - CPU#5 stuck for 23s! [sensors-detect:12438] Yikes! While I wanted to try to make it so that accessing an i2c bus on nouveau would wake up the GPU as needed, airlied pointed out that pretty much any usecase for userspace accessing an i2c bus on a GPU (mainly for the DDC brightness control that some displays have) is going to only be useful while there's at least one display enabled on the GPU anyway, and the GPU never sleeps while there's displays running. Since teaching the i2c bus to wake up the GPU on userspace accesses is a good deal more difficult than it might seem, mostly due to the fact that we have to use the i2c bus during runtime resume of the GPU, we instead opt for the easiest solution: don't let userspace access i2c busses on the GPU at all while it's in runtime suspend. Changes since v1: * Also disable i2c busses that run over DP AUX Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-04-09 20:23:30 +00:00
AUX_TRACE(aux, "acquire");
mutex_lock(&aux->mutex);
drm/nouveau/i2c: Disable i2c bus access after ->fini() For a while, we've had the problem of i2c bus access not grabbing a runtime PM ref when it's being used in userspace by i2c-dev, resulting in nouveau spamming the kernel log with errors if anything attempts to access the i2c bus while the GPU is in runtime suspend. An example: [ 130.078386] nouveau 0000:01:00.0: i2c: aux 000d: begin idle timeout ffffffff Since the GPU is in runtime suspend, the MMIO region that the i2c bus is on isn't accessible. On x86, the standard behavior for accessing an unavailable MMIO region is to just return ~0. Except, that turned out to be a lie. While computers with a clean concious will return ~0 in this scenario, some machines will actually completely hang a CPU on certian bad MMIO accesses. This was witnessed with someone's Lenovo ThinkPad P50, where sensors-detect attempting to access the i2c bus while the GPU was suspended would result in a CPU hang: CPU: 5 PID: 12438 Comm: sensors-detect Not tainted 5.0.0-0.rc4.git3.1.fc30.x86_64 #1 Hardware name: LENOVO 20EQS64N17/20EQS64N17, BIOS N1EET74W (1.47 ) 11/21/2017 RIP: 0010:ioread32+0x2b/0x30 Code: 81 ff ff ff 03 00 77 20 48 81 ff 00 00 01 00 76 05 0f b7 d7 ed c3 48 c7 c6 e1 0c 36 96 e8 2d ff ff ff b8 ff ff ff ff c3 8b 07 <c3> 0f 1f 40 00 49 89 f0 48 81 fe ff ff 03 00 76 04 40 88 3e c3 48 RSP: 0018:ffffaac3c5007b48 EFLAGS: 00000292 ORIG_RAX: ffffffffffffff13 RAX: 0000000001111000 RBX: 0000000001111000 RCX: 0000043017a97186 RDX: 0000000000000aaa RSI: 0000000000000005 RDI: ffffaac3c400e4e4 RBP: ffff9e6443902c00 R08: ffffaac3c400e4e4 R09: ffffaac3c5007be7 R10: 0000000000000004 R11: 0000000000000001 R12: ffff9e6445dd0000 R13: 000000000000e4e4 R14: 00000000000003c4 R15: 0000000000000000 FS: 00007f253155a740(0000) GS:ffff9e644f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005630d1500358 CR3: 0000000417c44006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: g94_i2c_aux_xfer+0x326/0x850 [nouveau] nvkm_i2c_aux_i2c_xfer+0x9e/0x140 [nouveau] __i2c_transfer+0x14b/0x620 i2c_smbus_xfer_emulated+0x159/0x680 ? _raw_spin_unlock_irqrestore+0x1/0x60 ? rt_mutex_slowlock.constprop.0+0x13d/0x1e0 ? __lock_is_held+0x59/0xa0 __i2c_smbus_xfer+0x138/0x5a0 i2c_smbus_xfer+0x4f/0x80 i2cdev_ioctl_smbus+0x162/0x2d0 [i2c_dev] i2cdev_ioctl+0x1db/0x2c0 [i2c_dev] do_vfs_ioctl+0x408/0x750 ksys_ioctl+0x5e/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x60/0x1e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f25317f546b Code: 0f 1e fa 48 8b 05 1d da 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed d9 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc88caab68 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00005630d0fe7260 RCX: 00007f25317f546b RDX: 00005630d1598e80 RSI: 0000000000000720 RDI: 0000000000000003 RBP: 00005630d155b968 R08: 0000000000000001 R09: 00005630d15a1da0 R10: 0000000000000070 R11: 0000000000000246 R12: 00005630d1598e80 R13: 00005630d12f3d28 R14: 0000000000000720 R15: 00005630d12f3ce0 watchdog: BUG: soft lockup - CPU#5 stuck for 23s! [sensors-detect:12438] Yikes! While I wanted to try to make it so that accessing an i2c bus on nouveau would wake up the GPU as needed, airlied pointed out that pretty much any usecase for userspace accessing an i2c bus on a GPU (mainly for the DDC brightness control that some displays have) is going to only be useful while there's at least one display enabled on the GPU anyway, and the GPU never sleeps while there's displays running. Since teaching the i2c bus to wake up the GPU on userspace accesses is a good deal more difficult than it might seem, mostly due to the fact that we have to use the i2c bus during runtime resume of the GPU, we instead opt for the easiest solution: don't let userspace access i2c busses on the GPU at all while it's in runtime suspend. Changes since v1: * Also disable i2c busses that run over DP AUX Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-04-09 20:23:30 +00:00
if (aux->enabled)
ret = nvkm_i2c_pad_acquire(pad, NVKM_I2C_PAD_AUX);
else
ret = -EIO;
if (ret)
mutex_unlock(&aux->mutex);
return ret;
}
int
nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *aux, bool retry, u8 type,
u32 addr, u8 *data, u8 *size)
{
if (!*size && !aux->func->address_only) {
AUX_ERR(aux, "address-only transaction dropped");
return -ENOSYS;
}
return aux->func->xfer(aux, retry, type, addr, data, size);
}
int
nvkm_i2c_aux_lnk_ctl(struct nvkm_i2c_aux *aux, int nr, int bw, bool ef)
{
if (aux->func->lnk_ctl)
return aux->func->lnk_ctl(aux, nr, bw, ef);
return -ENODEV;
}
void
nvkm_i2c_aux_del(struct nvkm_i2c_aux **paux)
{
struct nvkm_i2c_aux *aux = *paux;
if (aux && !WARN_ON(!aux->func)) {
AUX_TRACE(aux, "dtor");
list_del(&aux->head);
i2c_del_adapter(&aux->i2c);
kfree(*paux);
*paux = NULL;
}
}
drm/nouveau/i2c: Disable i2c bus access after ->fini() For a while, we've had the problem of i2c bus access not grabbing a runtime PM ref when it's being used in userspace by i2c-dev, resulting in nouveau spamming the kernel log with errors if anything attempts to access the i2c bus while the GPU is in runtime suspend. An example: [ 130.078386] nouveau 0000:01:00.0: i2c: aux 000d: begin idle timeout ffffffff Since the GPU is in runtime suspend, the MMIO region that the i2c bus is on isn't accessible. On x86, the standard behavior for accessing an unavailable MMIO region is to just return ~0. Except, that turned out to be a lie. While computers with a clean concious will return ~0 in this scenario, some machines will actually completely hang a CPU on certian bad MMIO accesses. This was witnessed with someone's Lenovo ThinkPad P50, where sensors-detect attempting to access the i2c bus while the GPU was suspended would result in a CPU hang: CPU: 5 PID: 12438 Comm: sensors-detect Not tainted 5.0.0-0.rc4.git3.1.fc30.x86_64 #1 Hardware name: LENOVO 20EQS64N17/20EQS64N17, BIOS N1EET74W (1.47 ) 11/21/2017 RIP: 0010:ioread32+0x2b/0x30 Code: 81 ff ff ff 03 00 77 20 48 81 ff 00 00 01 00 76 05 0f b7 d7 ed c3 48 c7 c6 e1 0c 36 96 e8 2d ff ff ff b8 ff ff ff ff c3 8b 07 <c3> 0f 1f 40 00 49 89 f0 48 81 fe ff ff 03 00 76 04 40 88 3e c3 48 RSP: 0018:ffffaac3c5007b48 EFLAGS: 00000292 ORIG_RAX: ffffffffffffff13 RAX: 0000000001111000 RBX: 0000000001111000 RCX: 0000043017a97186 RDX: 0000000000000aaa RSI: 0000000000000005 RDI: ffffaac3c400e4e4 RBP: ffff9e6443902c00 R08: ffffaac3c400e4e4 R09: ffffaac3c5007be7 R10: 0000000000000004 R11: 0000000000000001 R12: ffff9e6445dd0000 R13: 000000000000e4e4 R14: 00000000000003c4 R15: 0000000000000000 FS: 00007f253155a740(0000) GS:ffff9e644f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005630d1500358 CR3: 0000000417c44006 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: g94_i2c_aux_xfer+0x326/0x850 [nouveau] nvkm_i2c_aux_i2c_xfer+0x9e/0x140 [nouveau] __i2c_transfer+0x14b/0x620 i2c_smbus_xfer_emulated+0x159/0x680 ? _raw_spin_unlock_irqrestore+0x1/0x60 ? rt_mutex_slowlock.constprop.0+0x13d/0x1e0 ? __lock_is_held+0x59/0xa0 __i2c_smbus_xfer+0x138/0x5a0 i2c_smbus_xfer+0x4f/0x80 i2cdev_ioctl_smbus+0x162/0x2d0 [i2c_dev] i2cdev_ioctl+0x1db/0x2c0 [i2c_dev] do_vfs_ioctl+0x408/0x750 ksys_ioctl+0x5e/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x60/0x1e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f25317f546b Code: 0f 1e fa 48 8b 05 1d da 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed d9 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffc88caab68 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00005630d0fe7260 RCX: 00007f25317f546b RDX: 00005630d1598e80 RSI: 0000000000000720 RDI: 0000000000000003 RBP: 00005630d155b968 R08: 0000000000000001 R09: 00005630d15a1da0 R10: 0000000000000070 R11: 0000000000000246 R12: 00005630d1598e80 R13: 00005630d12f3d28 R14: 0000000000000720 R15: 00005630d12f3ce0 watchdog: BUG: soft lockup - CPU#5 stuck for 23s! [sensors-detect:12438] Yikes! While I wanted to try to make it so that accessing an i2c bus on nouveau would wake up the GPU as needed, airlied pointed out that pretty much any usecase for userspace accessing an i2c bus on a GPU (mainly for the DDC brightness control that some displays have) is going to only be useful while there's at least one display enabled on the GPU anyway, and the GPU never sleeps while there's displays running. Since teaching the i2c bus to wake up the GPU on userspace accesses is a good deal more difficult than it might seem, mostly due to the fact that we have to use the i2c bus during runtime resume of the GPU, we instead opt for the easiest solution: don't let userspace access i2c busses on the GPU at all while it's in runtime suspend. Changes since v1: * Also disable i2c busses that run over DP AUX Signed-off-by: Lyude Paul <lyude@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
2019-04-09 20:23:30 +00:00
void
nvkm_i2c_aux_init(struct nvkm_i2c_aux *aux)
{
AUX_TRACE(aux, "init");
mutex_lock(&aux->mutex);
aux->enabled = true;
mutex_unlock(&aux->mutex);
}
void
nvkm_i2c_aux_fini(struct nvkm_i2c_aux *aux)
{
AUX_TRACE(aux, "fini");
mutex_lock(&aux->mutex);
aux->enabled = false;
mutex_unlock(&aux->mutex);
}
int
nvkm_i2c_aux_ctor(const struct nvkm_i2c_aux_func *func,
struct nvkm_i2c_pad *pad, int id,
struct nvkm_i2c_aux *aux)
{
struct nvkm_device *device = pad->i2c->subdev.device;
aux->func = func;
aux->pad = pad;
aux->id = id;
mutex_init(&aux->mutex);
list_add_tail(&aux->head, &pad->i2c->aux);
AUX_TRACE(aux, "ctor");
snprintf(aux->i2c.name, sizeof(aux->i2c.name), "nvkm-%s-aux-%04x",
dev_name(device->dev), id);
aux->i2c.owner = THIS_MODULE;
aux->i2c.dev.parent = device->dev;
aux->i2c.algo = &nvkm_i2c_aux_i2c_algo;
return i2c_add_adapter(&aux->i2c);
}
int
nvkm_i2c_aux_new_(const struct nvkm_i2c_aux_func *func,
struct nvkm_i2c_pad *pad, int id,
struct nvkm_i2c_aux **paux)
{
if (!(*paux = kzalloc(sizeof(**paux), GFP_KERNEL)))
return -ENOMEM;
return nvkm_i2c_aux_ctor(func, pad, id, *paux);
}