diff --git a/.gitignore b/.gitignore index 5d56a3fd0de6..9dacde0a4b2d 100644 --- a/.gitignore +++ b/.gitignore @@ -57,6 +57,7 @@ modules.builtin include/config include/linux/version.h include/generated +arch/*/include/generated # stgit generated dirs patches-* diff --git a/.mailmap b/.mailmap index 5a6dd592eedc..353ad5607156 100644 --- a/.mailmap +++ b/.mailmap @@ -32,6 +32,7 @@ Brian Avery Brian King Christoph Hellwig Corey Minyard +Damian Hobson-Garcia David Brownell David Woodhouse Dmitry Eremin-Solenikov diff --git a/CREDITS b/CREDITS index dca6abcead6b..d78359f5f64d 100644 --- a/CREDITS +++ b/CREDITS @@ -328,7 +328,7 @@ S: Haifa, Israel N: Johannes Berg E: johannes@sipsolutions.net W: http://johannes.sipsolutions.net/ -P: 1024D/9AB78CA5 AD02 0176 4E29 C137 1DF6 08D2 FC44 CF86 9AB7 8CA5 +P: 4096R/7BF9099A C0EB C440 F6DA 091C 884D 8532 E0F3 73F3 7BF9 099A D: powerpc & 802.11 hacker N: Stephen R. van den Berg (AKA BuGless) @@ -518,6 +518,14 @@ N: Zach Brown E: zab@zabbo.net D: maestro pci sound +M: David Brownell +D: Kernel engineer, mentor, and friend. Maintained USB EHCI and +D: gadget layers, SPI subsystem, GPIO subsystem, and more than a few +D: device drivers. His encouragement also helped many engineers get +D: started working on the Linux kernel. David passed away in early +D: 2011, and will be greatly missed. +W: https://lkml.org/lkml/2011/4/5/36 + N: Gary Brubaker E: xavyer@ix.netcom.com D: USB Serial Empeg Empeg-car Mark I/II Driver @@ -2943,6 +2951,10 @@ S: Kasarmikatu 11 A4 S: 70110 Kuopio S: Finland +N: Tobias Ringström +E: tori@unhappy.mine.nu +D: Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver + N: Luca Risolia E: luca.risolia@studio.unibo.it P: 1024D/FCE635A4 88E8 F32F 7244 68BA 3958 5D40 99DA 5D2A FCE6 35A4 @@ -3913,6 +3925,10 @@ S: Flandernstrasse 101 S: D-73732 Esslingen S: Germany +N: Roman Zippel +E: zippel@linux-m68k.org +D: AFFS and HFS filesystems, m68k maintainer, new kernel configuration in 2.5 + N: Leonard N. Zubkoff W: http://www.dandelion.com/Linux/ D: BusLogic SCSI driver diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index c17cd4bb2290..1f89424c36a6 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -192,10 +192,6 @@ kernel-docs.txt - listing of various WWW + books that document kernel internals. kernel-parameters.txt - summary listing of command line / boot prompt args for the kernel. -keys-request-key.txt - - description of the kernel key request service. -keys.txt - - description of the kernel key retention service. kobject.txt - info of the kobject infrastructure of the Linux kernel. kprobes.txt @@ -294,6 +290,8 @@ scheduler/ - directory with info on the scheduler. scsi/ - directory with info on Linux scsi support. +security/ + - directory that contains security-related info serial/ - directory with info on the low level serial API. serial-console.txt @@ -328,8 +326,6 @@ sysrq.txt - info on the magic SysRq key. telephony/ - directory with info on telephony (e.g. voice over IP) support. -uml/ - - directory with information about User Mode Linux. unicode.txt - info on the Unicode character/font mapping used in Linux. unshare.txt diff --git a/Documentation/ABI/obsolete/o2cb b/Documentation/ABI/obsolete/o2cb deleted file mode 100644 index 9c49d8e6c0cc..000000000000 --- a/Documentation/ABI/obsolete/o2cb +++ /dev/null @@ -1,11 +0,0 @@ -What: /sys/o2cb symlink -Date: Dec 2005 -KernelVersion: 2.6.16 -Contact: ocfs2-devel@oss.oracle.com -Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink will - be removed when new versions of ocfs2-tools which know to look - in /sys/fs/o2cb are sufficiently prevalent. Don't code new - software to look here, it should try /sys/fs/o2cb instead. - See Documentation/ABI/stable/o2cb for more information on usage. -Users: ocfs2-tools. It's sufficient to mail proposed changes to - ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus new file mode 100644 index 000000000000..c2a270b45b03 --- /dev/null +++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus @@ -0,0 +1,10 @@ +What: /sys/bus/usb/devices/-:./::./koneplus/roccatkoneplus/startup_profile +Date: October 2010 +Contact: Stefan Achatz +Description: The integer value of this attribute ranges from 0-4. + When read, this attribute returns the number of the actual + profile. This value is persistent, so its equivalent to the + profile that's active when the mouse is powered on next time. + When written, this file sets the number of the startup profile + and the mouse activates this profile immediately. + Please use actual_profile, it does the same thing. diff --git a/Documentation/ABI/removed/o2cb b/Documentation/ABI/removed/o2cb new file mode 100644 index 000000000000..7f5daa465093 --- /dev/null +++ b/Documentation/ABI/removed/o2cb @@ -0,0 +1,10 @@ +What: /sys/o2cb symlink +Date: May 2011 +KernelVersion: 2.6.40 +Contact: ocfs2-devel@oss.oracle.com +Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is + removed when new versions of ocfs2-tools which know to look + in /sys/fs/o2cb are sufficiently prevalent. Don't code new + software to look here, it should try /sys/fs/o2cb instead. +Users: ocfs2-tools. It's sufficient to mail proposed changes to + ocfs2-devel@oss.oracle.com. diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 4873c759d535..c1eb41cb9876 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -142,3 +142,67 @@ Description: with the previous I/O request are enabled. When set to 2, all merge tries are disabled. The default value is 0 - which enables all types of merge tries. + +What: /sys/block//discard_alignment +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may + internally allocate space in units that are bigger than + the exported logical block size. The discard_alignment + parameter indicates how many bytes the beginning of the + device is offset from the internal allocation unit's + natural alignment. + +What: /sys/block///discard_alignment +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may + internally allocate space in units that are bigger than + the exported logical block size. The discard_alignment + parameter indicates how many bytes the beginning of the + partition is offset from the internal allocation unit's + natural alignment. + +What: /sys/block//queue/discard_granularity +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may + internally allocate space using units that are bigger + than the logical block size. The discard_granularity + parameter indicates the size of the internal allocation + unit in bytes if reported by the device. Otherwise the + discard_granularity will be set to match the device's + physical block size. A discard_granularity of 0 means + that the device does not support discard functionality. + +What: /sys/block//queue/discard_max_bytes +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may have + internal limits on the number of bytes that can be + trimmed or unmapped in a single operation. Some storage + protocols also have inherent limits on the number of + blocks that can be described in a single command. The + discard_max_bytes parameter is set by the device driver + to the maximum number of bytes that can be discarded in + a single operation. Discard requests issued to the + device must not exceed this limit. A discard_max_bytes + value of 0 means that the device does not support + discard functionality. + +What: /sys/block//queue/discard_zeroes_data +Date: May 2011 +Contact: Martin K. Petersen +Description: + Devices that support discard functionality may return + stale or random data when a previously discarded block + is read back. This can cause problems if the filesystem + expects discarded blocks to be explicitly cleared. If a + device reports that it deterministically returns zeroes + when a discarded area is read the discard_zeroes_data + parameter will be set to one. Otherwise it will be 0 and + the result of reading a discarded area is undefined. diff --git a/Documentation/ABI/testing/sysfs-bus-bcma b/Documentation/ABI/testing/sysfs-bus-bcma new file mode 100644 index 000000000000..06b62badddd1 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-bcma @@ -0,0 +1,31 @@ +What: /sys/bus/bcma/devices/.../manuf +Date: May 2011 +KernelVersion: 2.6.40 +Contact: Rafał Miłecki +Description: + Each BCMA core has it's manufacturer id. See + include/linux/bcma/bcma.h for possible values. + +What: /sys/bus/bcma/devices/.../id +Date: May 2011 +KernelVersion: 2.6.40 +Contact: Rafał Miłecki +Description: + There are a few types of BCMA cores, they can be identified by + id field. + +What: /sys/bus/bcma/devices/.../rev +Date: May 2011 +KernelVersion: 2.6.40 +Contact: Rafał Miłecki +Description: + BCMA cores of the same type can still slightly differ depending + on their revision. Use it for detailed programming. + +What: /sys/bus/bcma/devices/.../class +Date: May 2011 +KernelVersion: 2.6.40 +Contact: Rafał Miłecki +Description: + Each BCMA core is identified by few fields, including class it + belongs to. See include/linux/bcma/bcma.h for possible values. diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 36bf454ba855..349ecf26ce10 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -74,6 +74,15 @@ Description: hot-remove the PCI device and any of its children. Depends on CONFIG_HOTPLUG. +What: /sys/bus/pci/devices/.../pci_bus/.../rescan +Date: May 2011 +Contact: Linux PCI developers +Description: + Writing a non-zero value to this attribute will + force a rescan of the bus and all child buses, + and re-discover devices removed earlier from this + part of the device tree. Depends on CONFIG_HOTPLUG. + What: /sys/bus/pci/devices/.../rescan Date: January 2009 Contact: Linux PCI developers diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 new file mode 100644 index 000000000000..aa11dbdd794b --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 @@ -0,0 +1,56 @@ +What: /sys/class/backlight//_max +What: /sys/class/backlight//l1_daylight_max +What: /sys/class/backlight//l2_bright_max +What: /sys/class/backlight//l3_office_max +What: /sys/class/backlight//l4_indoor_max +What: /sys/class/backlight//l5_dark_max +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Control the maximum brightness for + on this . Values are between 0 and 127. This file + will also show the brightness level stored for this + . + +What: /sys/class/backlight//_dim +What: /sys/class/backlight//l2_bright_dim +What: /sys/class/backlight//l3_office_dim +What: /sys/class/backlight//l4_indoor_dim +What: /sys/class/backlight//l5_dark_dim +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Control the dim brightness for + on this . Values are between 0 and 127, typically + set to 0. Full off when the backlight is disabled. + This file will also show the dim brightness level stored for + this . + +What: /sys/class/backlight//ambient_light_level +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Get conversion value of the light sensor. + This value is updated every 80 ms (when the light sensor + is enabled). Returns integer between 0 (dark) and + 8000 (max ambient brightness) + +What: /sys/class/backlight//ambient_light_zone +Date: Mai 2011 +KernelVersion: 2.6.40 +Contact: device-drivers-devel@blackfin.uclinux.org +Description: + Get/Set current ambient light zone. Reading returns + integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark). + Writing a value between 1..5 forces the backlight controller + to enter the corresponding ambient light zone. + Writing 0 returns to normal/automatic ambient light level + operation. The ambient light sensing feature on these devices + is an extension to the API documented in + Documentation/ABI/stable/sysfs-class-backlight. + It can be enabled by writing the value stored in + /sys/class/backlight//max_brightness to + /sys/class/backlight//brightness. \ No newline at end of file diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 7564e88bfa43..e7be75b96e4b 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -183,21 +183,21 @@ Description: Discover and change clock speed of CPUs to learn how to control the knobs. -What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X -Date: August 2008 +What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1} +Date: August 2008 KernelVersion: 2.6.27 -Contact: mark.langsdorf@amd.com -Description: These files exist in every cpu's cache index directories. - There are currently 2 cache_disable_# files in each - directory. Reading from these files on a supported - processor will return that cache disable index value - for that processor and node. Writing to one of these - files will cause the specificed cache index to be disabled. +Contact: discuss@x86-64.org +Description: Disable L3 cache indices - Currently, only AMD Family 10h Processors support cache index - disable, and only for their L3 caches. See the BIOS and - Kernel Developer's Guide at - http://support.amd.com/us/Embedded_TechDocs/31116-Public-GH-BKDG_3-28_5-28-09.pdf - for formatting information and other details on the - cache index disable. -Users: joachim.deguara@amd.com + These files exist in every CPU's cache/index3 directory. Each + cache_disable_{0,1} file corresponds to one disable slot which + can be used to disable a cache index. Reading from these files + on a processor with this functionality will return the currently + disabled index for that node. There is one L3 structure per + node, or per internal node on MCM machines. Writing a valid + index to one of these files will cause the specificed cache + index to be disabled. + + All AMD processors with L3 caches provide this functionality. + For details, see BKDGs at + http://developer.amd.com/documentation/guides/Pages/default.aspx diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus index 326e05452da7..c1b53b8bc2ae 100644 --- a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus +++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus @@ -1,9 +1,12 @@ What: /sys/bus/usb/devices/-:./::./koneplus/roccatkoneplus/actual_profile Date: October 2010 Contact: Stefan Achatz -Description: When read, this file returns the number of the actual profile in - range 0-4. - This file is readonly. +Description: The integer value of this attribute ranges from 0-4. + When read, this attribute returns the number of the actual + profile. This value is persistent, so its equivalent to the + profile that's active when the mouse is powered on next time. + When written, this file sets the number of the startup profile + and the mouse activates this profile immediately. Users: http://roccat.sourceforge.net What: /sys/bus/usb/devices/-:./::./koneplus/roccatkoneplus/firmware_version @@ -89,16 +92,6 @@ Description: The mouse has a tracking- and a distance-control-unit. These This file is writeonly. Users: http://roccat.sourceforge.net -What: /sys/bus/usb/devices/-:./::./koneplus/roccatkoneplus/startup_profile -Date: October 2010 -Contact: Stefan Achatz -Description: The integer value of this attribute ranges from 0-4. - When read, this attribute returns the number of the profile - that's active when the mouse is powered on. - When written, this file sets the number of the startup profile - and the mouse activates this profile immediately. -Users: http://roccat.sourceforge.net - What: /sys/bus/usb/devices/-:./::./koneplus/roccatkoneplus/tcu Date: October 2010 Contact: Stefan Achatz diff --git a/Documentation/ABI/testing/sysfs-firmware-dmi b/Documentation/ABI/testing/sysfs-firmware-dmi index ba9da9503c23..c78f9ab01e56 100644 --- a/Documentation/ABI/testing/sysfs-firmware-dmi +++ b/Documentation/ABI/testing/sysfs-firmware-dmi @@ -14,14 +14,15 @@ Description: DMI is structured as a large table of entries, where each entry has a common header indicating the type and - length of the entry, as well as 'handle' that is - supposed to be unique amongst all entries. + length of the entry, as well as a firmware-provided + 'handle' that is supposed to be unique amongst all + entries. Some entries are required by the specification, but many others are optional. In general though, users should never expect to find a specific entry type on their system unless they know for certain what their firmware - is doing. Machine to machine will vary. + is doing. Machine to machine experiences will vary. Multiple entries of the same type are allowed. In order to handle these duplicate entry types, each entry is @@ -67,25 +68,24 @@ Description: and the two terminating nul characters. type : The type of the entry. This value is the same as found in the directory name. It indicates - how the rest of the entry should be - interpreted. + how the rest of the entry should be interpreted. instance: The instance ordinal of the entry for the given type. This value is the same as found in the parent directory name. - position: The position of the entry within the entirety - of the entirety. + position: The ordinal position (zero-based) of the entry + within the entirety of the DMI entry table. === Entry Specialization === Some entry types may have other information available in - sysfs. + sysfs. Not all types are specialized. --- Type 15 - System Event Log --- This entry allows the firmware to export a log of events the system has taken. This information is typically backed by nvram, but the implementation - details are abstracted by this table. This entries data + details are abstracted by this table. This entry's data is exported in the directory: /sys/firmware/dmi/entries/15-0/system_event_log diff --git a/Documentation/ABI/testing/sysfs-firmware-gsmi b/Documentation/ABI/testing/sysfs-firmware-gsmi new file mode 100644 index 000000000000..0faa0aaf4b6a --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-gsmi @@ -0,0 +1,58 @@ +What: /sys/firmware/gsmi +Date: March 2011 +Contact: Mike Waychison +Description: + Some servers used internally at Google have firmware + that provides callback functionality via explicit SMI + triggers. Some of the callbacks are similar to those + provided by the EFI runtime services page, but due to + historical reasons this different entry-point has been + used. + + The gsmi driver implements the kernel's abstraction for + these firmware callbacks. Currently, this functionality + is limited to handling the system event log and getting + access to EFI-style variables stored in nvram. + + Layout: + + /sys/firmware/gsmi/vars: + + This directory has the same layout (and + underlying implementation as /sys/firmware/efi/vars. + See Documentation/ABI/*/sysfs-firmware-efi-vars + for more information on how to interact with + this structure. + + /sys/firmware/gsmi/append_to_eventlog - write-only: + + This file takes a binary blob and passes it onto + the firmware to be timestamped and appended to + the system eventlog. The binary format is + interpreted by the firmware and may change from + platform to platform. The only kernel-enforced + requirement is that the blob be prefixed with a + 32bit host-endian type used as part of the + firmware call. + + /sys/firmware/gsmi/clear_config - write-only: + + Writing any value to this file will cause the + entire firmware configuration to be reset to + "factory defaults". Callers should assume that + a reboot is required for the configuration to be + cleared. + + /sys/firmware/gsmi/clear_eventlog - write-only: + + This file is used to clear out a portion/the + whole of the system event log. Values written + should be values between 1 and 100 inclusive (in + ASCII) representing the fraction of the log to + clear. Not all platforms support fractional + clearing though, and this writes to this file + will error out if the firmware doesn't like your + submitted fraction. + + Callers should assume that a reboot is needed + for this operation to complete. diff --git a/Documentation/ABI/testing/sysfs-firmware-log b/Documentation/ABI/testing/sysfs-firmware-log new file mode 100644 index 000000000000..9b58e7c5365f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-log @@ -0,0 +1,7 @@ +What: /sys/firmware/log +Date: February 2011 +Contact: Mike Waychison +Description: + The /sys/firmware/log is a binary file that represents a + read-only copy of the firmware's log if one is + available. diff --git a/Documentation/ABI/testing/sysfs-kernel-fscaps b/Documentation/ABI/testing/sysfs-kernel-fscaps new file mode 100644 index 000000000000..50a3033b5e15 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-fscaps @@ -0,0 +1,8 @@ +What: /sys/kernel/fscaps +Date: February 2011 +KernelVersion: 2.6.38 +Contact: Ludwig Nussel +Description + Shows whether file system capabilities are honored + when executing a binary + diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cleancache b/Documentation/ABI/testing/sysfs-kernel-mm-cleancache new file mode 100644 index 000000000000..662ae646ea12 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cleancache @@ -0,0 +1,11 @@ +What: /sys/kernel/mm/cleancache/ +Date: April 2011 +Contact: Dan Magenheimer +Description: + /sys/kernel/mm/cleancache/ contains a number of files which + record a count of various cleancache operations + (sum across all filesystems): + succ_gets + failed_gets + puts + flushes diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index 194ca446ac28..b464d12761ba 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -158,3 +158,17 @@ Description: successful, will make the kernel abort a subsequent transition to a sleep state if any wakeup events are reported after the write has returned. + +What: /sys/power/reserved_size +Date: May 2011 +Contact: Rafael J. Wysocki +Description: + The /sys/power/reserved_size file allows user space to control + the amount of memory reserved for allocations made by device + drivers during the "device freeze" stage of hibernation. It can + be written a string representing a non-negative integer that + will be used as the amount of memory to reserve for allocations + made by device drivers' "freeze" callbacks, in bytes. + + Reading from this file will display the current value, which is + set to 1 MB by default. diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp new file mode 100644 index 000000000000..d40d2b550502 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-ptp @@ -0,0 +1,98 @@ +What: /sys/class/ptp/ +Date: September 2010 +Contact: Richard Cochran +Description: + This directory contains files and directories + providing a standardized interface to the ancillary + features of PTP hardware clocks. + +What: /sys/class/ptp/ptpN/ +Date: September 2010 +Contact: Richard Cochran +Description: + This directory contains the attributes of the Nth PTP + hardware clock registered into the PTP class driver + subsystem. + +What: /sys/class/ptp/ptpN/clock_name +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the name of the PTP hardware clock + as a human readable string. + +What: /sys/class/ptp/ptpN/max_adjustment +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the PTP hardware clock's maximum + frequency adjustment value (a positive integer) in + parts per billion. + +What: /sys/class/ptp/ptpN/n_alarms +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of periodic or one shot + alarms offer by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/n_external_timestamps +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of external timestamp + channels offered by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/n_periodic_outputs +Date: September 2010 +Contact: Richard Cochran +Description: + This file contains the number of programmable periodic + output channels offered by the PTP hardware clock. + +What: /sys/class/ptp/ptpN/pps_avaiable +Date: September 2010 +Contact: Richard Cochran +Description: + This file indicates whether the PTP hardware clock + supports a Pulse Per Second to the host CPU. Reading + "1" means that the PPS is supported, while "0" means + not supported. + +What: /sys/class/ptp/ptpN/extts_enable +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables external + timestamps. To enable external timestamps, write the + channel index followed by a "1" into the file. + To disable external timestamps, write the channel + index followed by a "0" into the file. + +What: /sys/class/ptp/ptpN/fifo +Date: September 2010 +Contact: Richard Cochran +Description: + This file provides timestamps on external events, in + the form of three integers: channel index, seconds, + and nanoseconds. + +What: /sys/class/ptp/ptpN/period +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables periodic + outputs. To enable a periodic output, write five + integers into the file: channel index, start time + seconds, start time nanoseconds, period seconds, and + period nanoseconds. To disable a periodic output, set + all the seconds and nanoseconds values to zero. + +What: /sys/class/ptp/ptpN/pps_enable +Date: September 2010 +Contact: Richard Cochran +Description: + This write-only file enables or disables delivery of + PPS events to the Linux PPS subsystem. To enable PPS + events, write a "1" into the file. To disable events, + write a "0" into the file. diff --git a/Documentation/DocBook/.gitignore b/Documentation/DocBook/.gitignore index c6def352fe39..679034cbd686 100644 --- a/Documentation/DocBook/.gitignore +++ b/Documentation/DocBook/.gitignore @@ -8,3 +8,4 @@ *.dvi *.log *.out +media/ diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 8436b018c289..3cebfa0d1611 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -73,7 +73,7 @@ installmandocs: mandocs ### #External programs used KERNELDOC = $(srctree)/scripts/kernel-doc -DOCPROC = $(objtree)/scripts/basic/docproc +DOCPROC = $(objtree)/scripts/docproc XMLTOFLAGS = -m $(srctree)/Documentation/DocBook/stylesheet.xsl XMLTOFLAGS += --skip-validation diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index 36f63d4a0a06..b638e50cf8f6 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -96,10 +96,10 @@ X!Iinclude/linux/kobject.h Device drivers infrastructure + The Basic Device Driver-Model Structures +!Iinclude/linux/device.h + Device Drivers Base - !Edrivers/base/driver.c !Edrivers/base/core.c !Edrivers/base/class.c diff --git a/Documentation/DocBook/dvb/dvbapi.xml b/Documentation/DocBook/dvb/dvbapi.xml index ad8678d48916..9fad86ce7f5e 100644 --- a/Documentation/DocBook/dvb/dvbapi.xml +++ b/Documentation/DocBook/dvb/dvbapi.xml @@ -34,6 +34,14 @@ + + 2.0.4 + 2011-05-06 + mcc + + Add more information about DVB APIv5, better describing the frontend GET/SET props ioctl's. + + 2.0.3 2010-07-03 diff --git a/Documentation/DocBook/dvb/dvbproperty.xml b/Documentation/DocBook/dvb/dvbproperty.xml index 97f397e2fb3a..b5365f61d69b 100644 --- a/Documentation/DocBook/dvb/dvbproperty.xml +++ b/Documentation/DocBook/dvb/dvbproperty.xml @@ -1,6 +1,330 @@ -
+
FE_GET_PROPERTY/FE_SET_PROPERTY + +/* Reserved fields should be set to 0 */ +struct dtv_property { + __u32 cmd; + union { + __u32 data; + struct { + __u8 data[32]; + __u32 len; + __u32 reserved1[3]; + void *reserved2; + } buffer; + } u; + int result; +} __attribute__ ((packed)); + +/* num of properties cannot exceed DTV_IOCTL_MAX_MSGS per ioctl */ +#define DTV_IOCTL_MAX_MSGS 64 + +struct dtv_properties { + __u32 num; + struct dtv_property *props; +}; + + +
+FE_GET_PROPERTY +DESCRIPTION + + +This ioctl call returns one or more frontend properties. This call only + requires read-only access to the device. + + +SYNOPSIS + + +int ioctl(int fd, int request = FE_GET_PROPERTY, + dtv_properties ⋆props); + + +PARAMETERS + + +int fd + +File descriptor returned by a previous call to open(). + + +int num + +Equals FE_GET_PROPERTY for this command. + + +struct dtv_property *props + +Points to the location where the front-end property commands are stored. + + +ERRORS + + EINVAL + Invalid parameter(s) received or number of parameters out of the range. + + ENOMEM + Out of memory. + + EFAULT + Failure while copying data from/to userspace. + + EOPNOTSUPP + Property type not supported. + +
+ +
+FE_SET_PROPERTY +DESCRIPTION + + +This ioctl call sets one or more frontend properties. This call only + requires read-only access to the device. + + +SYNOPSIS + + +int ioctl(int fd, int request = FE_SET_PROPERTY, + dtv_properties ⋆props); + + +PARAMETERS + + +int fd + +File descriptor returned by a previous call to open(). + + +int num + +Equals FE_SET_PROPERTY for this command. + + +struct dtv_property *props + +Points to the location where the front-end property commands are stored. + + +ERRORS + + + EINVAL + Invalid parameter(s) received or number of parameters out of the range. + + ENOMEM + Out of memory. + + EFAULT + Failure while copying data from/to userspace. + + EOPNOTSUPP + Property type not supported. + +
+ +
+ Property types + +On FE_GET_PROPERTY/FE_SET_PROPERTY, +the actual action is determined by the dtv_property cmd/data pairs. With one single ioctl, is possible to +get/set up to 64 properties. The actual meaning of each property is described on the next sections. + + +The available frontend property types are: + +#define DTV_UNDEFINED 0 +#define DTV_TUNE 1 +#define DTV_CLEAR 2 +#define DTV_FREQUENCY 3 +#define DTV_MODULATION 4 +#define DTV_BANDWIDTH_HZ 5 +#define DTV_INVERSION 6 +#define DTV_DISEQC_MASTER 7 +#define DTV_SYMBOL_RATE 8 +#define DTV_INNER_FEC 9 +#define DTV_VOLTAGE 10 +#define DTV_TONE 11 +#define DTV_PILOT 12 +#define DTV_ROLLOFF 13 +#define DTV_DISEQC_SLAVE_REPLY 14 +#define DTV_FE_CAPABILITY_COUNT 15 +#define DTV_FE_CAPABILITY 16 +#define DTV_DELIVERY_SYSTEM 17 +#define DTV_ISDBT_PARTIAL_RECEPTION 18 +#define DTV_ISDBT_SOUND_BROADCASTING 19 +#define DTV_ISDBT_SB_SUBCHANNEL_ID 20 +#define DTV_ISDBT_SB_SEGMENT_IDX 21 +#define DTV_ISDBT_SB_SEGMENT_COUNT 22 +#define DTV_ISDBT_LAYERA_FEC 23 +#define DTV_ISDBT_LAYERA_MODULATION 24 +#define DTV_ISDBT_LAYERA_SEGMENT_COUNT 25 +#define DTV_ISDBT_LAYERA_TIME_INTERLEAVING 26 +#define DTV_ISDBT_LAYERB_FEC 27 +#define DTV_ISDBT_LAYERB_MODULATION 28 +#define DTV_ISDBT_LAYERB_SEGMENT_COUNT 29 +#define DTV_ISDBT_LAYERB_TIME_INTERLEAVING 30 +#define DTV_ISDBT_LAYERC_FEC 31 +#define DTV_ISDBT_LAYERC_MODULATION 32 +#define DTV_ISDBT_LAYERC_SEGMENT_COUNT 33 +#define DTV_ISDBT_LAYERC_TIME_INTERLEAVING 34 +#define DTV_API_VERSION 35 +#define DTV_CODE_RATE_HP 36 +#define DTV_CODE_RATE_LP 37 +#define DTV_GUARD_INTERVAL 38 +#define DTV_TRANSMISSION_MODE 39 +#define DTV_HIERARCHY 40 +#define DTV_ISDBT_LAYER_ENABLED 41 +#define DTV_ISDBS_TS_ID 42 + +
+ +
+ Parameters that are common to all Digital TV standards +
+ <constant>DTV_FREQUENCY</constant> + + Central frequency of the channel, in HZ. + + Notes: + 1)For ISDB-T, the channels are usually transmitted with an offset of 143kHz. + E.g. a valid frequncy could be 474143 kHz. The stepping is bound to the bandwidth of + the channel which is 6MHz. + + 2)As in ISDB-Tsb the channel consists of only one or three segments the + frequency step is 429kHz, 3*429 respectively. As for ISDB-T the + central frequency of the channel is expected. +
+ +
+ <constant>DTV_BANDWIDTH_HZ</constant> + + Bandwidth for the channel, in HZ. + + Possible values: + 1712000, + 5000000, + 6000000, + 7000000, + 8000000, + 10000000. + + + Notes: + + 1) For ISDB-T it should be always 6000000Hz (6MHz) + 2) For ISDB-Tsb it can vary depending on the number of connected segments + 3) Bandwidth doesn't apply for DVB-C transmissions, as the bandwidth + for DVB-C depends on the symbol rate + 4) Bandwidth in ISDB-T is fixed (6MHz) or can be easily derived from + other parameters (DTV_ISDBT_SB_SEGMENT_IDX, + DTV_ISDBT_SB_SEGMENT_COUNT). + 5) DVB-T supports 6, 7 and 8MHz. + 6) In addition, DVB-T2 supports 1.172, 5 and 10MHz. +
+ +
+ <constant>DTV_DELIVERY_SYSTEM</constant> + + Specifies the type of Delivery system + + Possible values: + +typedef enum fe_delivery_system { + SYS_UNDEFINED, + SYS_DVBC_ANNEX_AC, + SYS_DVBC_ANNEX_B, + SYS_DVBT, + SYS_DSS, + SYS_DVBS, + SYS_DVBS2, + SYS_DVBH, + SYS_ISDBT, + SYS_ISDBS, + SYS_ISDBC, + SYS_ATSC, + SYS_ATSCMH, + SYS_DMBTH, + SYS_CMMB, + SYS_DAB, + SYS_DVBT2, +} fe_delivery_system_t; + + +
+ +
+ <constant>DTV_TRANSMISSION_MODE</constant> + + Specifies the number of carriers used by the standard + + Possible values are: + +typedef enum fe_transmit_mode { + TRANSMISSION_MODE_2K, + TRANSMISSION_MODE_8K, + TRANSMISSION_MODE_AUTO, + TRANSMISSION_MODE_4K, + TRANSMISSION_MODE_1K, + TRANSMISSION_MODE_16K, + TRANSMISSION_MODE_32K, +} fe_transmit_mode_t; + + + Notes: + 1) ISDB-T supports three carrier/symbol-size: 8K, 4K, 2K. It is called + 'mode' in the standard: Mode 1 is 2K, mode 2 is 4K, mode 3 is 8K + + 2) If DTV_TRANSMISSION_MODE is set the TRANSMISSION_MODE_AUTO the + hardware will try to find the correct FFT-size (if capable) and will + use TMCC to fill in the missing parameters. + 3) DVB-T specifies 2K and 8K as valid sizes. + 4) DVB-T2 specifies 1K, 2K, 4K, 8K, 16K and 32K. +
+ +
+ <constant>DTV_GUARD_INTERVAL</constant> + + Possible values are: + +typedef enum fe_guard_interval { + GUARD_INTERVAL_1_32, + GUARD_INTERVAL_1_16, + GUARD_INTERVAL_1_8, + GUARD_INTERVAL_1_4, + GUARD_INTERVAL_AUTO, + GUARD_INTERVAL_1_128, + GUARD_INTERVAL_19_128, + GUARD_INTERVAL_19_256, +} fe_guard_interval_t; + + + Notes: + 1) If DTV_GUARD_INTERVAL is set the GUARD_INTERVAL_AUTO the hardware will + try to find the correct guard interval (if capable) and will use TMCC to fill + in the missing parameters. + 2) Intervals 1/128, 19/128 and 19/256 are used only for DVB-T2 at present +
+
+
ISDB-T frontend This section describes shortly what are the possible parameters in the Linux @@ -32,73 +356,6 @@ Parameters used by ISDB-T and ISDB-Tsb. -
- Parameters that are common with DVB-T and ATSC - -
- <constant>DTV_FREQUENCY</constant> - - Central frequency of the channel. - - For ISDB-T the channels are usually transmitted with an offset of 143kHz. E.g. a - valid frequncy could be 474143 kHz. The stepping is bound to the bandwidth of - the channel which is 6MHz. - - As in ISDB-Tsb the channel consists of only one or three segments the - frequency step is 429kHz, 3*429 respectively. As for ISDB-T the - central frequency of the channel is expected. -
- -
- <constant>DTV_BANDWIDTH_HZ</constant> (optional) - - Possible values: - - For ISDB-T it should be always 6000000Hz (6MHz) - For ISDB-Tsb it can vary depending on the number of connected segments - - Note: Hardware specific values might be given here, but standard - applications should not bother to set a value to this field as - standard demods are ignoring it anyway. - - Bandwidth in ISDB-T is fixed (6MHz) or can be easily derived from - other parameters (DTV_ISDBT_SB_SEGMENT_IDX, - DTV_ISDBT_SB_SEGMENT_COUNT). -
- -
- <constant>DTV_DELIVERY_SYSTEM</constant> - - Possible values: SYS_ISDBT -
- -
- <constant>DTV_TRANSMISSION_MODE</constant> - - ISDB-T supports three carrier/symbol-size: 8K, 4K, 2K. It is called - 'mode' in the standard: Mode 1 is 2K, mode 2 is 4K, mode 3 is 8K - - Possible values: TRANSMISSION_MODE_2K, TRANSMISSION_MODE_8K, - TRANSMISSION_MODE_AUTO, TRANSMISSION_MODE_4K - - If DTV_TRANSMISSION_MODE is set the TRANSMISSION_MODE_AUTO the - hardware will try to find the correct FFT-size (if capable) and will - use TMCC to fill in the missing parameters. - - TRANSMISSION_MODE_4K is added at the same time as the other new parameters. -
- -
- <constant>DTV_GUARD_INTERVAL</constant> - - Possible values: GUARD_INTERVAL_1_32, GUARD_INTERVAL_1_16, GUARD_INTERVAL_1_8, - GUARD_INTERVAL_1_4, GUARD_INTERVAL_AUTO - - If DTV_GUARD_INTERVAL is set the GUARD_INTERVAL_AUTO the hardware will - try to find the correct guard interval (if capable) and will use TMCC to fill - in the missing parameters. -
-
ISDB-T only parameters @@ -314,5 +571,20 @@
+
+ DVB-T2 parameters + + This section covers parameters that apply only to the DVB-T2 delivery method. DVB-T2 + support is currently in the early stages development so expect this section to grow + and become more detailed with time. + +
+ <constant>DTV_DVBT2_PLP_ID</constant> + + DVB-T2 supports Physical Layer Pipes (PLP) to allow transmission of + many data types via a single multiplex. The API will soon support this + at which point this section will be expanded. +
+
diff --git a/Documentation/DocBook/dvb/frontend.h.xml b/Documentation/DocBook/dvb/frontend.h.xml index d08e0d401418..d792f789ad3b 100644 --- a/Documentation/DocBook/dvb/frontend.h.xml +++ b/Documentation/DocBook/dvb/frontend.h.xml @@ -176,14 +176,20 @@ typedef enum fe_transmit_mode { TRANSMISSION_MODE_2K, TRANSMISSION_MODE_8K, TRANSMISSION_MODE_AUTO, - TRANSMISSION_MODE_4K + TRANSMISSION_MODE_4K, + TRANSMISSION_MODE_1K, + TRANSMISSION_MODE_16K, + TRANSMISSION_MODE_32K, } fe_transmit_mode_t; typedef enum fe_bandwidth { BANDWIDTH_8_MHZ, BANDWIDTH_7_MHZ, BANDWIDTH_6_MHZ, - BANDWIDTH_AUTO + BANDWIDTH_AUTO, + BANDWIDTH_5_MHZ, + BANDWIDTH_10_MHZ, + BANDWIDTH_1_712_MHZ, } fe_bandwidth_t; @@ -192,7 +198,10 @@ typedef enum fe_guard_interval { GUARD_INTERVAL_1_16, GUARD_INTERVAL_1_8, GUARD_INTERVAL_1_4, - GUARD_INTERVAL_AUTO + GUARD_INTERVAL_AUTO, + GUARD_INTERVAL_1_128, + GUARD_INTERVAL_19_128, + GUARD_INTERVAL_19_256, } fe_guard_interval_t; @@ -306,7 +315,9 @@ struct dvb_frontend_event { #define DTV_ISDBS_TS_ID 42 -#define DTV_MAX_COMMAND DTV_ISDBS_TS_ID +#define DTV_DVBT2_PLP_ID 43 + +#define DTV_MAX_COMMAND DTV_DVBT2_PLP_ID typedef enum fe_pilot { PILOT_ON, @@ -338,6 +349,7 @@ typedef enum fe_delivery_system { SYS_DMBTH, SYS_CMMB, SYS_DAB, + SYS_DVBT2, } fe_delivery_system_t; struct dtv_cmds_h { diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl index fb10fd08c05c..b3422341d65c 100644 --- a/Documentation/DocBook/genericirq.tmpl +++ b/Documentation/DocBook/genericirq.tmpl @@ -191,8 +191,8 @@ Whenever an interrupt triggers, the lowlevel arch code calls into the generic interrupt code by calling desc->handle_irq(). - This highlevel IRQ handling function only uses desc->chip primitives - referenced by the assigned chip descriptor structure. + This highlevel IRQ handling function only uses desc->irq_data.chip + primitives referenced by the assigned chip descriptor structure.
@@ -206,11 +206,11 @@ enable_irq() disable_irq_nosync() (SMP only) synchronize_irq() (SMP only) - set_irq_type() - set_irq_wake() - set_irq_data() - set_irq_chip() - set_irq_chip_data() + irq_set_irq_type() + irq_set_irq_wake() + irq_set_handler_data() + irq_set_chip() + irq_set_chip_data() See the autogenerated function documentation for details. @@ -225,6 +225,8 @@ handle_fasteoi_irq handle_simple_irq handle_percpu_irq + handle_edge_eoi_irq + handle_bad_irq The interrupt flow handlers (either predefined or architecture specific) are assigned to specific interrupts by the architecture @@ -241,13 +243,13 @@ default_enable(struct irq_data *data) { - desc->chip->irq_unmask(data); + desc->irq_data.chip->irq_unmask(data); } default_disable(struct irq_data *data) { if (!delay_disable(data)) - desc->chip->irq_mask(data); + desc->irq_data.chip->irq_mask(data); } default_ack(struct irq_data *data) @@ -284,9 +286,9 @@ noop(struct irq_data *data)) The following control flow is implemented (simplified excerpt): -desc->chip->irq_mask(); -handle_IRQ_event(desc->action); -desc->chip->irq_unmask(); +desc->irq_data.chip->irq_mask_ack(); +handle_irq_event(desc->action); +desc->irq_data.chip->irq_unmask(); @@ -300,8 +302,8 @@ desc->chip->irq_unmask(); The following control flow is implemented (simplified excerpt): -handle_IRQ_event(desc->action); -desc->chip->irq_eoi(); +handle_irq_event(desc->action); +desc->irq_data.chip->irq_eoi(); @@ -315,17 +317,17 @@ desc->chip->irq_eoi(); The following control flow is implemented (simplified excerpt): if (desc->status & running) { - desc->chip->irq_mask(); + desc->irq_data.chip->irq_mask_ack(); desc->status |= pending | masked; return; } -desc->chip->irq_ack(); +desc->irq_data.chip->irq_ack(); desc->status |= running; do { if (desc->status & masked) - desc->chip->irq_unmask(); + desc->irq_data.chip->irq_unmask(); desc->status &= ~pending; - handle_IRQ_event(desc->action); + handle_irq_event(desc->action); } while (status & pending); desc->status &= ~running; @@ -344,7 +346,7 @@ desc->status &= ~running; The following control flow is implemented (simplified excerpt): -handle_IRQ_event(desc->action); +handle_irq_event(desc->action); @@ -362,12 +364,29 @@ handle_IRQ_event(desc->action); The following control flow is implemented (simplified excerpt): -handle_IRQ_event(desc->action); -if (desc->chip->irq_eoi) - desc->chip->irq_eoi(); +if (desc->irq_data.chip->irq_ack) + desc->irq_data.chip->irq_ack(); +handle_irq_event(desc->action); +if (desc->irq_data.chip->irq_eoi) + desc->irq_data.chip->irq_eoi(); + + EOI Edge IRQ flow handler + + handle_edge_eoi_irq provides an abnomination of the edge + handler which is solely used to tame a badly wreckaged + irq controller on powerpc/cell. + + + + Bad IRQ flow handler + + handle_bad_irq is used for spurious interrupts which + have no real handler assigned.. + + Quirks and optimizations @@ -410,6 +429,7 @@ if (desc->chip->irq_eoi) irq_mask_ack() - Optional, recommended for performance irq_mask() irq_unmask() + irq_eoi() - Optional, required for eoi flow handlers irq_retrigger() - Optional irq_set_type() - Optional irq_set_wake() - Optional @@ -424,32 +444,24 @@ if (desc->chip->irq_eoi) __do_IRQ entry point - The original implementation __do_IRQ() is an alternative entry - point for all types of interrupts. + The original implementation __do_IRQ() was an alternative entry + point for all types of interrupts. It not longer exists. This handler turned out to be not suitable for all interrupt hardware and was therefore reimplemented with split - functionality for egde/level/simple/percpu interrupts. This is not + functionality for edge/level/simple/percpu interrupts. This is not only a functional optimization. It also shortens code paths for interrupts. - - To make use of the split implementation, replace the call to - __do_IRQ by a call to desc->handle_irq() and associate - the appropriate handler function to desc->handle_irq(). - In most cases the generic handler implementations should - be sufficient. - Locking on SMP The locking of chip registers is up to the architecture that - defines the chip primitives. There is a chip->lock field that can be used - for serialization, but the generic layer does not touch it. The per-irq - structure is protected via desc->lock, by the generic layer. + defines the chip primitives. The per-irq structure is + protected via desc->lock, by the generic layer. diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl index 5d259c632cdf..e5fe09430fd9 100644 --- a/Documentation/DocBook/media-entities.tmpl +++ b/Documentation/DocBook/media-entities.tmpl @@ -270,6 +270,7 @@ + @@ -292,8 +293,11 @@ + + + @@ -370,9 +374,9 @@ - - - + + + diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl index 6f242d5dee9a..17910e2052ad 100644 --- a/Documentation/DocBook/mtdnand.tmpl +++ b/Documentation/DocBook/mtdnand.tmpl @@ -189,8 +189,7 @@ static void __iomem *baseaddr; Partition defines If you want to divide your device into partitions, then - enable the configuration switch CONFIG_MTD_PARTITIONS and define - a partitioning scheme suitable to your board. + define a partitioning scheme suitable to your board. #define NUM_PARTITIONS 2 diff --git a/Documentation/DocBook/v4l/media-controller.xml b/Documentation/DocBook/v4l/media-controller.xml index 2dc25e1d4089..873ac3a621f0 100644 --- a/Documentation/DocBook/v4l/media-controller.xml +++ b/Documentation/DocBook/v4l/media-controller.xml @@ -78,9 +78,9 @@ Function Reference - &sub-media-open; - &sub-media-close; - &sub-media-ioctl; + &sub-media-func-open; + &sub-media-func-close; + &sub-media-func-ioctl; &sub-media-ioc-device-info; &sub-media-ioc-enum-entities; diff --git a/Documentation/DocBook/v4l/media-ioc-setup-link.xml b/Documentation/DocBook/v4l/media-ioc-setup-link.xml index 2331e76ded17..cec97af4dab4 100644 --- a/Documentation/DocBook/v4l/media-ioc-setup-link.xml +++ b/Documentation/DocBook/v4l/media-ioc-setup-link.xml @@ -34,7 +34,7 @@ request - MEDIA_IOC_ENUM_LINKS + MEDIA_IOC_SETUP_LINK diff --git a/Documentation/DocBook/v4l/pixfmt-m420.xml b/Documentation/DocBook/v4l/pixfmt-m420.xml new file mode 100644 index 000000000000..ce4bc019e5c0 --- /dev/null +++ b/Documentation/DocBook/v4l/pixfmt-m420.xml @@ -0,0 +1,147 @@ + + + V4L2_PIX_FMT_M420 ('M420') + &manvol; + + + V4L2_PIX_FMT_M420 + Format with ½ horizontal and vertical chroma + resolution, also known as YUV 4:2:0. Hybrid plane line-interleaved + layout. + + + Description + + M420 is a YUV format with ½ horizontal and vertical chroma + subsampling (YUV 4:2:0). Pixels are organized as interleaved luma and + chroma planes. Two lines of luma data are followed by one line of chroma + data. + The luma plane has one byte per pixel. The chroma plane contains + interleaved CbCr pixels subsampled by ½ in the horizontal and + vertical directions. Each CbCr pair belongs to four pixels. For example, +Cb0/Cr0 belongs to +Y'00, Y'01, +Y'10, Y'11. + + All line lengths are identical: if the Y lines include pad bytes + so do the CbCr lines. + + + <constant>V4L2_PIX_FMT_M420</constant> 4 × 4 +pixel image + + + Byte Order. + Each cell is one byte. + + + + + + start + 0: + Y'00 + Y'01 + Y'02 + Y'03 + + + start + 4: + Y'10 + Y'11 + Y'12 + Y'13 + + + start + 8: + Cb00 + Cr00 + Cb01 + Cr01 + + + start + 16: + Y'20 + Y'21 + Y'22 + Y'23 + + + start + 20: + Y'30 + Y'31 + Y'32 + Y'33 + + + start + 24: + Cb10 + Cr10 + Cb11 + Cr11 + + + + + + + + + Color Sample Location. + + + + + + + 01 + 23 + + + 0 + YY + YY + + + + C + C + + + 1 + YY + YY + + + + + + 2 + YY + YY + + + + C + C + + + 3 + YY + YY + + + + + + + + + + + diff --git a/Documentation/DocBook/v4l/pixfmt-y10b.xml b/Documentation/DocBook/v4l/pixfmt-y10b.xml new file mode 100644 index 000000000000..adb0ad808c93 --- /dev/null +++ b/Documentation/DocBook/v4l/pixfmt-y10b.xml @@ -0,0 +1,43 @@ + + + V4L2_PIX_FMT_Y10BPACK ('Y10B') + &manvol; + + + V4L2_PIX_FMT_Y10BPACK + Grey-scale image as a bit-packed array + + + Description + + This is a packed grey-scale image format with a depth of 10 bits per + pixel. Pixels are stored in a bit-packed array of 10bit bits per pixel, + with no padding between them and with the most significant bits coming + first from the left. + + + <constant>V4L2_PIX_FMT_Y10BPACK</constant> 4 pixel data stream taking 5 bytes + + + Bit-packed representation + pixels cross the byte boundary and have a ratio of 5 bytes for each 4 + pixels. + + + + + + Y'00[9:2] + Y'00[1:0]Y'01[9:4] + Y'01[3:0]Y'02[9:6] + Y'02[5:0]Y'03[9:8] + Y'03[7:0] + + + + + + + + + diff --git a/Documentation/DocBook/v4l/pixfmt-y12.xml b/Documentation/DocBook/v4l/pixfmt-y12.xml new file mode 100644 index 000000000000..ff417b858cc9 --- /dev/null +++ b/Documentation/DocBook/v4l/pixfmt-y12.xml @@ -0,0 +1,79 @@ + + + V4L2_PIX_FMT_Y12 ('Y12 ') + &manvol; + + + V4L2_PIX_FMT_Y12 + Grey-scale image + + + Description + + This is a grey-scale image with a depth of 12 bits per pixel. Pixels +are stored in 16-bit words with unused high bits padded with 0. The least +significant byte is stored at lower memory addresses (little-endian). + + + <constant>V4L2_PIX_FMT_Y12</constant> 4 × 4 +pixel image + + + Byte Order. + Each cell is one byte. + + + + + + start + 0: + Y'00low + Y'00high + Y'01low + Y'01high + Y'02low + Y'02high + Y'03low + Y'03high + + + start + 8: + Y'10low + Y'10high + Y'11low + Y'11high + Y'12low + Y'12high + Y'13low + Y'13high + + + start + 16: + Y'20low + Y'20high + Y'21low + Y'21high + Y'22low + Y'22high + Y'23low + Y'23high + + + start + 24: + Y'30low + Y'30high + Y'31low + Y'31high + Y'32low + Y'32high + Y'33low + Y'33high + + + + + + + + + diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml index c6fdcbbd1b41..deb660207f94 100644 --- a/Documentation/DocBook/v4l/pixfmt.xml +++ b/Documentation/DocBook/v4l/pixfmt.xml @@ -673,6 +673,7 @@ access the palette, this must be done with ioctls of the Linux framebuffer API.< &sub-srggb8; &sub-sbggr16; &sub-srggb10; + &sub-srggb12;
@@ -696,6 +697,8 @@ information. &sub-packed-yuv; &sub-grey; &sub-y10; + &sub-y12; + &sub-y10b; &sub-y16; &sub-yuyv; &sub-uyvy; @@ -711,6 +714,7 @@ information. &sub-nv12m; &sub-nv12mt; &sub-nv16; + &sub-m420;
diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml index 7041127d6dfc..8d3409d2c632 100644 --- a/Documentation/DocBook/v4l/subdev-formats.xml +++ b/Documentation/DocBook/v4l/subdev-formats.xml @@ -456,6 +456,23 @@ b1 b0 + + V4L2_MBUS_FMT_SGBRG8_1X8 + 0x3013 + + - + - + - + - + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + V4L2_MBUS_FMT_SGRBG8_1X8 0x3002 @@ -473,6 +490,23 @@ g1 g0 + + V4L2_MBUS_FMT_SRGGB8_1X8 + 0x3014 + + - + - + - + - + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 0x300b @@ -2159,6 +2193,31 @@ u1 u0 + + V4L2_MBUS_FMT_Y12_1X12 + 0x2013 + + - + - + - + - + - + - + - + - + y11 + y10 + y9 + y8 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + V4L2_MBUS_FMT_UYVY8_1X16 0x200f @@ -2463,5 +2522,51 @@
+ +
+ JPEG Compressed Formats + + Those data formats consist of an ordered sequence of 8-bit bytes + obtained from JPEG compression process. Additionally to the + _JPEG prefix the format code is made of + the following information. + + The number of bus samples per entropy encoded byte. + The bus width. + + + + For instance, for a JPEG baseline process and an 8-bit bus width + the format will be named V4L2_MBUS_FMT_JPEG_1X8. + + + The following table lists existing JPEG compressed formats. + + + JPEG Formats + + + + + + + Identifier + Code + Remarks + + + + + V4L2_MBUS_FMT_JPEG_1X8 + 0x4001 + Besides of its usage for the parallel bus this format is + recommended for transmission of JPEG data over MIPI CSI bus + using the User Defined 8-bit Data types. + + + + +
+
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml index 2b796a2ee98a..c50536a4f596 100644 --- a/Documentation/DocBook/v4l/videodev2.h.xml +++ b/Documentation/DocBook/v4l/videodev2.h.xml @@ -311,6 +311,9 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') /* 10 Greyscale */ #define V4L2_PIX_FMT_Y16 v4l2_fourcc('Y', '1', '6', ' ') /* 16 Greyscale */ +/* Grey bit-packed formats */ +#define V4L2_PIX_FMT_Y10BPACK v4l2_fourcc('Y', '1', '0', 'B') /* 10 Greyscale bit-packed */ + /* Palette formats */ #define V4L2_PIX_FMT_PAL8 v4l2_fourcc('P', 'A', 'L', '8') /* 8 8-bit palette */ @@ -333,6 +336,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUV420 v4l2_fourcc('Y', 'U', '1', '2') /* 12 YUV 4:2:0 */ #define V4L2_PIX_FMT_HI240 v4l2_fourcc('H', 'I', '2', '4') /* 8 8-bit color */ #define V4L2_PIX_FMT_HM12 v4l2_fourcc('H', 'M', '1', '2') /* 8 YUV 4:2:0 16x16 macroblocks */ +#define V4L2_PIX_FMT_M420 v4l2_fourcc('M', '4', '2', '0') /* 12 YUV 4:2:0 2 lines y, 1 line uv interleaved */ /* two planes -- one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12 v4l2_fourcc('N', 'V', '1', '2') /* 12 Y/CbCr 4:2:0 */ diff --git a/Documentation/HOWTO b/Documentation/HOWTO index 365bda9a0d94..81bc1a9ab9d8 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -209,7 +209,7 @@ tools. One such tool that is particularly recommended is the Linux Cross-Reference project, which is able to present source code in a self-referential, indexed webpage format. An excellent up-to-date repository of the kernel code may be found at: - http://users.sosdg.org/~qiyong/lxr/ + http://lxr.linux.no/+trees The development process diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt index b4a615b78403..7890fae18529 100644 --- a/Documentation/IRQ-affinity.txt +++ b/Documentation/IRQ-affinity.txt @@ -4,10 +4,11 @@ ChangeLog: SMP IRQ affinity -/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted -for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed -to turn off all CPUs, and if an IRQ controller does not support IRQ -affinity then the value will not change from the default 0xffffffff. +/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify +which target CPUs are permitted for a given IRQ source. It's a bitmask +(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not +allowed to turn off all CPUs, and if an IRQ controller does not support +IRQ affinity then the value will not change from the default of all cpus. /proc/irq/default_smp_affinity specifies default affinity mask that applies to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask @@ -54,3 +55,11 @@ round-trip min/avg/max = 0.1/0.5/585.4 ms This time around IRQ44 was delivered only to the last four processors. i.e counters for the CPU0-3 did not change. +Here is an example of limiting that same irq (44) to cpus 1024 to 1031: + +[root@moon 44]# echo 1024-1031 > smp_affinity +[root@moon 44]# cat smp_affinity +1024-1031 + +Note that to do this with a bitmask would require 32 bitmasks of zero +to follow the pertinent one. diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index 71b6f500ddb9..1d7a885761f5 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -21,7 +21,7 @@ rcu.txt RTFP.txt - List of RCU papers (bibliography) going back to 1980. stallwarn.txt - - RCU CPU stall warnings (CONFIG_RCU_CPU_STALL_DETECTOR) + - RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress) torture.txt - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) trace.txt diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 862c08ef1fde..4e959208f736 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -1,22 +1,25 @@ Using RCU's CPU Stall Detector -The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables -RCU's CPU stall detector, which detects conditions that unduly delay -RCU grace periods. The stall detector's idea of what constitutes -"unduly delayed" is controlled by a set of C preprocessor macros: +The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall +detector, which detects conditions that unduly delay RCU grace periods. +This module parameter enables CPU stall detection by default, but +may be overridden via boot-time parameter or at runtime via sysfs. +The stall detector's idea of what constitutes "unduly delayed" is +controlled by a set of kernel configuration variables and cpp macros: -RCU_SECONDS_TILL_STALL_CHECK +CONFIG_RCU_CPU_STALL_TIMEOUT - This macro defines the period of time that RCU will wait from - the beginning of a grace period until it issues an RCU CPU - stall warning. This time period is normally ten seconds. + This kernel configuration parameter defines the period of time + that RCU will wait from the beginning of a grace period until it + issues an RCU CPU stall warning. This time period is normally + ten seconds. RCU_SECONDS_TILL_STALL_RECHECK This macro defines the period of time that RCU will wait after issuing a stall warning until it issues another stall warning - for the same stall. This time period is normally set to thirty - seconds. + for the same stall. This time period is normally set to three + times the check interval plus thirty seconds. RCU_STALL_RAT_DELAY diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 6a8c73f55b80..8173cec473aa 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -10,34 +10,46 @@ for rcutree and next for rcutiny. CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats -These implementations of RCU provides five debugfs files under the -top-level directory RCU: rcu/rcudata (which displays fields in struct -rcu_data), rcu/rcudata.csv (which is a .csv spreadsheet version of -rcu/rcudata), rcu/rcugp (which displays grace-period counters), -rcu/rcuhier (which displays the struct rcu_node hierarchy), and -rcu/rcu_pending (which displays counts of the reasons that the -rcu_pending() function decided that there was core RCU work to do). +These implementations of RCU provides several debugfs files under the +top-level directory "rcu": + +rcu/rcudata: + Displays fields in struct rcu_data. +rcu/rcudata.csv: + Comma-separated values spreadsheet version of rcudata. +rcu/rcugp: + Displays grace-period counters. +rcu/rcuhier: + Displays the struct rcu_node hierarchy. +rcu/rcu_pending: + Displays counts of the reasons rcu_pending() decided that RCU had + work to do. +rcu/rcutorture: + Displays rcutorture test progress. +rcu/rcuboost: + Displays RCU boosting statistics. Only present if + CONFIG_RCU_BOOST=y. The output of "cat rcu/rcudata" looks as follows: rcu_sched: - 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 - 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 - 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 - 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10 - 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10 - 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10 - 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10 - 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10 + 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 + 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 + 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 + 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 + 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 + 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 + 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 + 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 rcu_bh: - 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10 - 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10 - 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 - 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10 - 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 - 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 - 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 - 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 + 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 + 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 + 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 + 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 + 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 + 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 + 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 The first section lists the rcu_data structures for rcu_sched, the second for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an @@ -52,17 +64,18 @@ o The number at the beginning of each line is the CPU number. substantially larger than the number of actual CPUs. o "c" is the count of grace periods that this CPU believes have - completed. CPUs in dynticks idle mode may lag quite a ways - behind, for example, CPU 4 under "rcu_sched" above, which has - slept through the past 25 RCU grace periods. It is not unusual - to see CPUs lagging by thousands of grace periods. + completed. Offlined CPUs and CPUs in dynticks idle mode may + lag quite a ways behind, for example, CPU 6 under "rcu_sched" + above, which has been offline through not quite 40,000 RCU grace + periods. It is not unusual to see CPUs lagging by thousands of + grace periods. o "g" is the count of grace periods that this CPU believes have - started. Again, CPUs in dynticks idle mode may lag behind. - If the "c" and "g" values are equal, this CPU has already - reported a quiescent state for the last RCU grace period that - it is aware of, otherwise, the CPU believes that it owes RCU a - quiescent state. + started. Again, offlined CPUs and CPUs in dynticks idle mode + may lag behind. If the "c" and "g" values are equal, this CPU + has already reported a quiescent state for the last RCU grace + period that it is aware of, otherwise, the CPU believes that it + owes RCU a quiescent state. o "pq" indicates that this CPU has passed through a quiescent state for the current grace period. It is possible for "pq" to be @@ -81,22 +94,16 @@ o "pqc" indicates which grace period the last-observed quiescent the next grace period! o "qp" indicates that RCU still expects a quiescent state from - this CPU. + this CPU. Offlined CPUs and CPUs in dyntick idle mode might + well have qp=1, which is OK: RCU is still ignoring them. o "dt" is the current value of the dyntick counter that is incremented when entering or leaving dynticks idle state, either by the - scheduler or by irq. The number after the "/" is the interrupt - nesting depth when in dyntick-idle state, or one greater than - the interrupt-nesting depth otherwise. - - This field is displayed only for CONFIG_NO_HZ kernels. - -o "dn" is the current value of the dyntick counter that is incremented - when entering or leaving dynticks idle state via NMI. If both - the "dt" and "dn" values are even, then this CPU is in dynticks - idle mode and may be ignored by RCU. If either of these two - counters is odd, then RCU must be alert to the possibility of - an RCU read-side critical section running on this CPU. + scheduler or by irq. This number is even if the CPU is in + dyntick idle mode and odd otherwise. The number after the first + "/" is the interrupt nesting depth when in dyntick-idle state, + or one greater than the interrupt-nesting depth otherwise. + The number after the second "/" is the NMI nesting depth. This field is displayed only for CONFIG_NO_HZ kernels. @@ -108,7 +115,7 @@ o "df" is the number of times that some other CPU has forced a o "of" is the number of times that some other CPU has forced a quiescent state on behalf of this CPU due to this CPU being - offline. In a perfect world, this might neve happen, but it + offline. In a perfect world, this might never happen, but it turns out that offlining and onlining a CPU can take several grace periods, and so there is likely to be an extended period of time when RCU believes that the CPU is online when it really is not. @@ -125,6 +132,62 @@ o "ql" is the number of RCU callbacks currently residing on of what state they are in (new, waiting for grace period to start, waiting for grace period to end, ready to invoke). +o "qs" gives an indication of the state of the callback queue + with four characters: + + "N" Indicates that there are callbacks queued that are not + ready to be handled by the next grace period, and thus + will be handled by the grace period following the next + one. + + "R" Indicates that there are callbacks queued that are + ready to be handled by the next grace period. + + "W" Indicates that there are callbacks queued that are + waiting on the current grace period. + + "D" Indicates that there are callbacks queued that have + already been handled by a prior grace period, and are + thus waiting to be invoked. Note that callbacks in + the process of being invoked are not counted here. + Callbacks in the process of being invoked are those + that have been removed from the rcu_data structures + queues by rcu_do_batch(), but which have not yet been + invoked. + + If there are no callbacks in a given one of the above states, + the corresponding character is replaced by ".". + +o "kt" is the per-CPU kernel-thread state. The digit preceding + the first slash is zero if there is no work pending and 1 + otherwise. The character between the first pair of slashes is + as follows: + + "S" The kernel thread is stopped, in other words, all + CPUs corresponding to this rcu_node structure are + offline. + + "R" The kernel thread is running. + + "W" The kernel thread is waiting because there is no work + for it to do. + + "O" The kernel thread is waiting because it has been + forced off of its designated CPU or because its + ->cpus_allowed mask permits it to run on other than + its designated CPU. + + "Y" The kernel thread is yielding to avoid hogging CPU. + + "?" Unknown value, indicates a bug. + + The number after the final slash is the CPU that the kthread + is actually running on. + +o "ktl" is the low-order 16 bits (in hexadecimal) of the count of + the number of times that this CPU's per-CPU kthread has gone + through its loop servicing invoke_rcu_cpu_kthread() requests. + o "b" is the batch limit for this CPU. If more than this number of RCU callbacks is ready to invoke, then the remainder will be deferred. @@ -174,14 +237,14 @@ o "gpnum" is the number of grace periods that have started. It is The output of "cat rcu/rcuhier" looks as follows, with very long lines: c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 -1/1 .>. 0:127 ^0 -3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 -3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 +1/1 ..>. 0:127 ^0 +3/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 +3/3f ..>. 0:5 ^0 2/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 rcu_bh: c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 -0/1 .>. 0:127 ^0 -0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 -0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 +0/1 ..>. 0:127 ^0 +0/3 ..>. 0:35 ^0 0/0 ..>. 36:71 ^1 0/0 ..>. 72:107 ^2 0/0 ..>. 108:127 ^3 +0/3f ..>. 0:5 ^0 0/3 ..>. 6:11 ^1 0/0 ..>. 12:17 ^2 0/0 ..>. 18:23 ^3 0/0 ..>. 24:29 ^4 0/0 ..>. 30:35 ^5 0/0 ..>. 36:41 ^0 0/0 ..>. 42:47 ^1 0/0 ..>. 48:53 ^2 0/0 ..>. 54:59 ^3 0/0 ..>. 60:65 ^4 0/0 ..>. 66:71 ^5 0/0 ..>. 72:77 ^0 0/0 ..>. 78:83 ^1 0/0 ..>. 84:89 ^2 0/0 ..>. 90:95 ^3 0/0 ..>. 96:101 ^4 0/0 ..>. 102:107 ^5 0/0 ..>. 108:113 ^0 0/0 ..>. 114:119 ^1 0/0 ..>. 120:125 ^2 0/0 ..>. 126:127 ^3 This is once again split into "rcu_sched" and "rcu_bh" portions, and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional @@ -240,13 +303,20 @@ o Each element of the form "1/1 0:127 ^0" represents one struct current grace period. o The characters separated by the ">" indicate the state - of the blocked-tasks lists. A "T" preceding the ">" + of the blocked-tasks lists. A "G" preceding the ">" indicates that at least one task blocked in an RCU read-side critical section blocks the current grace - period, while a "." preceding the ">" indicates otherwise. - The character following the ">" indicates similarly for - the next grace period. A "T" should appear in this - field only for rcu-preempt. + period, while a "E" preceding the ">" indicates that + at least one task blocked in an RCU read-side critical + section blocks the current expedited grace period. + A "T" character following the ">" indicates that at + least one task is blocked within an RCU read-side + critical section, regardless of whether any current + grace period (expedited or normal) is inconvenienced. + A "." character appears if the corresponding condition + does not hold, so that "..>." indicates that no tasks + are blocked. In contrast, "GE>T" indicates maximal + inconvenience from blocked tasks. o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful @@ -328,6 +398,113 @@ o "nn" is the number of times that this CPU needed nothing. Alert is due to short-circuit evaluation in rcu_pending(). +The output of "cat rcu/rcutorture" looks as follows: + +rcutorture test sequence: 0 (test in progress) +rcutorture update version number: 615 + +The first line shows the number of rcutorture tests that have completed +since boot. If a test is currently running, the "(test in progress)" +string will appear as shown above. The second line shows the number of +update cycles that the current test has started, or zero if there is +no test in progress. + + +The output of "cat rcu/rcuboost" looks as follows: + +0:5 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f + balk: nt=0 egt=989 bt=0 nb=0 ny=0 nos=16 +6:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=2f95 bt=300f + balk: nt=0 egt=225 bt=0 nb=0 ny=0 nos=6 + +This information is output only for rcu_preempt. Each two-line entry +corresponds to a leaf rcu_node strcuture. The fields are as follows: + +o "n:m" is the CPU-number range for the corresponding two-line + entry. In the sample output above, the first entry covers + CPUs zero through five and the second entry covers CPUs 6 + and 7. + +o "tasks=TNEB" gives the state of the various segments of the + rnp->blocked_tasks list: + + "T" This indicates that there are some tasks that blocked + while running on one of the corresponding CPUs while + in an RCU read-side critical section. + + "N" This indicates that some of the blocked tasks are preventing + the current normal (non-expedited) grace period from + completing. + + "E" This indicates that some of the blocked tasks are preventing + the current expedited grace period from completing. + + "B" This indicates that some of the blocked tasks are in + need of RCU priority boosting. + + Each character is replaced with "." if the corresponding + condition does not hold. + +o "kt" is the state of the RCU priority-boosting kernel + thread associated with the corresponding rcu_node structure. + The state can be one of the following: + + "S" The kernel thread is stopped, in other words, all + CPUs corresponding to this rcu_node structure are + offline. + + "R" The kernel thread is running. + + "W" The kernel thread is waiting because there is no work + for it to do. + + "Y" The kernel thread is yielding to avoid hogging CPU. + + "?" Unknown value, indicates a bug. + +o "ntb" is the number of tasks boosted. + +o "neb" is the number of tasks boosted in order to complete an + expedited grace period. + +o "nnb" is the number of tasks boosted in order to complete a + normal (non-expedited) grace period. When boosting a task + that was blocking both an expedited and a normal grace period, + it is counted against the expedited total above. + +o "j" is the low-order 16 bits of the jiffies counter in + hexadecimal. + +o "bt" is the low-order 16 bits of the value that the jiffies + counter will have when we next start boosting, assuming that + the current grace period does not end beforehand. This is + also in hexadecimal. + +o "balk: nt" counts the number of times we didn't boost (in + other words, we balked) even though it was time to boost because + there were no blocked tasks to boost. This situation occurs + when there is one blocked task on one rcu_node structure and + none on some other rcu_node structure. + +o "egt" counts the number of times we balked because although + there were blocked tasks, none of them were blocking the + current grace period, whether expedited or otherwise. + +o "bt" counts the number of times we balked because boosting + had already been initiated for the current grace period. + +o "nb" counts the number of times we balked because there + was at least one task blocking the current non-expedited grace + period that never had blocked. If it is already running, it + just won't help to boost its priority! + +o "ny" counts the number of times we balked because it was + not yet time to start boosting. + +o "nos" counts the number of times we balked for other + reasons, e.g., the grace period ended first. + + CONFIG_TINY_RCU and CONFIG_TINY_PREEMPT_RCU debugfs Files and Formats These implementations of RCU provides a single debugfs file under the @@ -394,9 +571,9 @@ o "neb" is the number of expedited grace periods that have had o "nnb" is the number of normal grace periods that have had to resort to RCU priority boosting since boot. -o "j" is the low-order 12 bits of the jiffies counter in hexadecimal. +o "j" is the low-order 16 bits of the jiffies counter in hexadecimal. -o "bt" is the low-order 12 bits of the value that the jiffies counter +o "bt" is the low-order 16 bits of the value that the jiffies counter will have at the next time that boosting is scheduled to begin. o In the line beginning with "normal balk", the fields are as follows: diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index e439cd0d3375..569f3532e138 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -714,10 +714,11 @@ Jeff Garzik, "Linux kernel patch submission format". Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer". - - - - + + + + + NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people! diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt index eda40fd39cad..d16a9849e60e 100644 --- a/Documentation/accounting/cgroupstats.txt +++ b/Documentation/accounting/cgroupstats.txt @@ -21,7 +21,7 @@ information will not be available. To extract cgroup statistics a utility very similar to getdelays.c has been developed, the sample output of the utility is shown below -~/balbir/cgroupstats # ./getdelays -C "/cgroup/a" +~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a" sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0 -~/balbir/cgroupstats # ./getdelays -C "/cgroup" +~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup" sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2 diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index e9c77788a39d..f6318f6d7baf 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -177,6 +177,8 @@ static int get_family_id(int sd) rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, (void *)name, strlen(TASKSTATS_GENL_NAME)+1); + if (rc < 0) + return 0; /* sendto() failure? */ rep_len = recv(sd, &ans, sizeof(ans), 0); if (ans.n.nlmsg_type == NLMSG_ERROR || @@ -191,30 +193,37 @@ static int get_family_id(int sd) return id; } +#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1)) + static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu\n" - "IO %15s%15s\n" - " %15llu%15llu\n" - "SWAP %15s%15s\n" - " %15llu%15llu\n" - "RECLAIM %12s%15s\n" - " %15llu%15llu\n", - "count", "real total", "virtual total", "delay total", + printf("\n\nCPU %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms\n" + "IO %15s%15s%15s\n" + " %15llu%15llu%15llums\n" + "SWAP %15s%15s%15s\n" + " %15llu%15llu%15llums\n" + "RECLAIM %12s%15s%15s\n" + " %15llu%15llu%15llums\n", + "count", "real total", "virtual total", + "delay total", "delay average", (unsigned long long)t->cpu_count, (unsigned long long)t->cpu_run_real_total, (unsigned long long)t->cpu_run_virtual_total, (unsigned long long)t->cpu_delay_total, - "count", "delay total", + average_ms((double)t->cpu_delay_total, t->cpu_count), + "count", "delay total", "delay average", (unsigned long long)t->blkio_count, (unsigned long long)t->blkio_delay_total, - "count", "delay total", + average_ms(t->blkio_delay_total, t->blkio_count), + "count", "delay total", "delay average", (unsigned long long)t->swapin_count, (unsigned long long)t->swapin_delay_total, - "count", "delay total", + average_ms(t->swapin_delay_total, t->swapin_count), + "count", "delay total", "delay average", (unsigned long long)t->freepages_count, - (unsigned long long)t->freepages_delay_total); + (unsigned long long)t->freepages_delay_total, + average_ms(t->freepages_delay_total, t->freepages_count)); } static void task_context_switch_counts(struct taskstats *t) @@ -433,8 +442,6 @@ int main(int argc, char *argv[]) } do { - int i; - rep_len = recv(nl_sd, &msg, sizeof(msg), 0); PRINTF("received %d bytes\n", rep_len); @@ -459,7 +466,6 @@ int main(int argc, char *argv[]) na = (struct nlattr *) GENLMSG_DATA(&msg); len = 0; - i = 0; while (len < rep_len) { len += NLA_ALIGN(na->nla_len); switch (na->nla_type) { diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt index 3e1d25aee3fb..5f55373dd53b 100644 --- a/Documentation/acpi/method-customizing.txt +++ b/Documentation/acpi/method-customizing.txt @@ -66,3 +66,8 @@ Note: We can use a kernel with multiple custom ACPI method running, But each individual write to debugfs can implement a SINGLE method override. i.e. if we want to insert/override multiple ACPI methods, we need to redo step c) ~ g) for multiple times. + +Note: Be aware that root can mis-use this driver to modify arbitrary + memory and gain additional rights, if root's privileges got + restricted (for example if root is not allowed to load additional + modules after boot). diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting index 76850295af8f..4e686a2ed91e 100644 --- a/Documentation/arm/Booting +++ b/Documentation/arm/Booting @@ -65,13 +65,19 @@ looks at the connected hardware is beyond the scope of this document. The boot loader must ultimately be able to provide a MACH_TYPE_xxx value to the kernel. (see linux/arch/arm/tools/mach-types). - -4. Setup the kernel tagged list -------------------------------- +4. Setup boot data +------------------ Existing boot loaders: OPTIONAL, HIGHLY RECOMMENDED New boot loaders: MANDATORY +The boot loader must provide either a tagged list or a dtb image for +passing configuration data to the kernel. The physical address of the +boot data is passed to the kernel in register r2. + +4a. Setup the kernel tagged list +-------------------------------- + The boot loader must create and initialise the kernel tagged list. A valid tagged list starts with ATAG_CORE and ends with ATAG_NONE. The ATAG_CORE tag may or may not be empty. An empty ATAG_CORE tag @@ -101,6 +107,24 @@ The tagged list must be placed in a region of memory where neither the kernel decompressor nor initrd 'bootp' program will overwrite it. The recommended placement is in the first 16KiB of RAM. +4b. Setup the device tree +------------------------- + +The boot loader must load a device tree image (dtb) into system ram +at a 64bit aligned address and initialize it with the boot data. The +dtb format is documented in Documentation/devicetree/booting-without-of.txt. +The kernel will look for the dtb magic value of 0xd00dfeed at the dtb +physical address to determine if a dtb has been passed instead of a +tagged list. + +The boot loader must pass at a minimum the size and location of the +system memory, and the root filesystem location. The dtb must be +placed in a region of memory where the kernel decompressor will not +overwrite it. The recommended placement is in the first 16KiB of RAM +with the caveat that it may not be located at physical address 0 since +the kernel interprets a value of 0 in r2 to mean neither a tagged list +nor a dtb were passed. + 5. Calling the kernel image --------------------------- @@ -125,7 +149,8 @@ In either case, the following conditions must be met: - CPU register settings r0 = 0, r1 = machine type number discovered in (3) above. - r2 = physical address of tagged list in system RAM. + r2 = physical address of tagged list in system RAM, or + physical address of device tree block (dtb) in system RAM - CPU mode All forms of interrupts must be disabled (IRQs and FIQs) diff --git a/Documentation/arm/Samsung/Overview.txt b/Documentation/arm/Samsung/Overview.txt index c3094ea51aa7..658abb258cef 100644 --- a/Documentation/arm/Samsung/Overview.txt +++ b/Documentation/arm/Samsung/Overview.txt @@ -14,7 +14,6 @@ Introduction - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list - S3C64XX: S3C6400 and S3C6410 - S5P6440 - - S5P6442 - S5PC100 - S5PC110 / S5PV210 @@ -36,7 +35,6 @@ Configuration unifying all the SoCs into one kernel. s5p6440_defconfig - S5P6440 specific default configuration - s5p6442_defconfig - S5P6442 specific default configuration s5pc100_defconfig - S5PC100 specific default configuration s5pc110_defconfig - S5PC110 specific default configuration s5pv210_defconfig - S5PV210 specific default configuration diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index ac4d47187122..3bd585b44927 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt @@ -12,7 +12,7 @@ Also, it should be made opaque such that any kind of cast to a normal C integer type will fail. Something like the following should suffice: - typedef struct { volatile int counter; } atomic_t; + typedef struct { int counter; } atomic_t; Historically, counter has been declared volatile. This is now discouraged. See Documentation/volatile-considered-harmful.txt for the complete rationale. diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt index 89698e8df7d4..c00c6a5ab21f 100644 --- a/Documentation/blockdev/cciss.txt +++ b/Documentation/blockdev/cciss.txt @@ -169,3 +169,18 @@ is issued which positions the tape to a known position. Typically you must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example) before i/o can proceed again to a tape drive which was reset. +There is a cciss_tape_cmds module parameter which can be used to make cciss +allocate more commands for use by tape drives. Ordinarily only a few commands +(6) are allocated for tape drives because tape drives are slow and +infrequently used and the primary purpose of Smart Array controllers is to +act as a RAID controller for disk drives, so the vast majority of commands +are allocated for disk devices. However, if you have more than a few tape +drives attached to a smart array, the default number of commands may not be +enought (for example, if you have 8 tape drives, you could only rewind 6 +at one time with the default number of commands.) The cciss_tape_cmds module +parameter allows more commands (up to 16 more) to be allocated for use by +tape drives. For example: + + insmod cciss.ko cciss_tape_cmds=16 + +Or, as a kernel boot parameter passed in via grub: cciss.cciss_tape_cmds=8 diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt index 9164ae3b83bc..9b728dc17535 100644 --- a/Documentation/cachetlb.txt +++ b/Documentation/cachetlb.txt @@ -16,7 +16,7 @@ on all processors in the system. Don't let this scare you into thinking SMP cache/tlb flushing must be so inefficient, this is in fact an area where many optimizations are possible. For example, if it can be proven that a user address space has never executed -on a cpu (see vma->cpu_vm_mask), one need not perform a flush +on a cpu (see mm_cpumask()), one need not perform a flush for this address space on that cpu. First, the TLB flushing interfaces, since they are the simplest. The diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index 465351d4cf85..cd45c8ea7463 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -28,16 +28,19 @@ cgroups. Here is what you can do. - Enable group scheduling in CFQ CONFIG_CFQ_GROUP_IOSCHED=y -- Compile and boot into kernel and mount IO controller (blkio). +- Compile and boot into kernel and mount IO controller (blkio); see + cgroups.txt, Why are cgroups needed?. - mount -t cgroup -o blkio none /cgroup + mount -t tmpfs cgroup_root /sys/fs/cgroup + mkdir /sys/fs/cgroup/blkio + mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - Create two cgroups - mkdir -p /cgroup/test1/ /cgroup/test2 + mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2 - Set weights of group test1 and test2 - echo 1000 > /cgroup/test1/blkio.weight - echo 500 > /cgroup/test2/blkio.weight + echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight + echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight - Create two same size files (say 512MB each) on same disk (file1, file2) and launch two dd threads in different cgroup to read those files. @@ -46,12 +49,12 @@ cgroups. Here is what you can do. echo 3 > /proc/sys/vm/drop_caches dd if=/mnt/sdb/zerofile1 of=/dev/null & - echo $! > /cgroup/test1/tasks - cat /cgroup/test1/tasks + echo $! > /sys/fs/cgroup/blkio/test1/tasks + cat /sys/fs/cgroup/blkio/test1/tasks dd if=/mnt/sdb/zerofile2 of=/dev/null & - echo $! > /cgroup/test2/tasks - cat /cgroup/test2/tasks + echo $! > /sys/fs/cgroup/blkio/test2/tasks + cat /sys/fs/cgroup/blkio/test2/tasks - At macro level, first dd should finish first. To get more precise data, keep on looking at (with the help of script), at blkio.disk_time and @@ -68,13 +71,13 @@ Throttling/Upper Limit policy - Enable throttling in block layer CONFIG_BLK_DEV_THROTTLING=y -- Mount blkio controller - mount -t cgroup -o blkio none /cgroup/blkio +- Mount blkio controller (see cgroups.txt, Why are cgroups needed?) + mount -t cgroup -o blkio none /sys/fs/cgroup/blkio - Specify a bandwidth rate on particular device for root group. The format for policy is ": ". - echo "8:16 1048576" > /cgroup/blkio/blkio.read_bps_device + echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.read_bps_device Above will put a limit of 1MB/second on reads happening for root group on device having major/minor number 8:16. @@ -108,7 +111,7 @@ Hierarchical Cgroups CFQ and throttling will practically treat all groups at same level. pivot - / | \ \ + / / \ \ root test1 test2 test3 Down the line we can implement hierarchical accounting/control support @@ -149,7 +152,7 @@ Proportional weight policy files Following is the format. - #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device + # echo dev_maj:dev_minor weight > blkio.weight_device Configure weight=300 on /dev/sdb (8:16) in this cgroup # echo 8:16 300 > blkio.weight_device # cat blkio.weight_device diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index aedf1bd02fdd..cd67e90003c0 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -138,11 +138,11 @@ With the ability to classify tasks differently for different resources the admin can easily set up a script which receives exec notifications and depending on who is launching the browser he can - # echo browser_pid > /mnt///tasks + # echo browser_pid > /sys/fs/cgroup///tasks With only a single hierarchy, he now would potentially have to create a separate cgroup for every browser launched and associate it with -approp network and other resource class. This may lead to +appropriate network and other resource class. This may lead to proliferation of such cgroups. Also lets say that the administrator would like to give enhanced network @@ -153,9 +153,9 @@ apps enhanced CPU power, With ability to write pids directly to resource classes, it's just a matter of : - # echo pid > /mnt/network//tasks + # echo pid > /sys/fs/cgroup/network//tasks (after some time) - # echo pid > /mnt/network//tasks + # echo pid > /sys/fs/cgroup/network//tasks Without this ability, he would have to split the cgroup into multiple separate ones and then associate the new cgroups with the @@ -236,7 +236,8 @@ containing the following files describing that cgroup: - cgroup.procs: list of tgids in the cgroup. This list is not guaranteed to be sorted or free of duplicate tgids, and userspace should sort/uniquify the list if this property is required. - This is a read-only file, for now. + Writing a thread group id into this file moves all threads in that + group into this cgroup. - notify_on_release flag: run the release agent on exit? - release_agent: the path to use for release notifications (this file exists in the top cgroup only) @@ -309,21 +310,24 @@ subsystem, this is the case for the cpuset. To start a new job that is to be contained within a cgroup, using the "cpuset" cgroup subsystem, the steps are something like: - 1) mkdir /dev/cgroup - 2) mount -t cgroup -ocpuset cpuset /dev/cgroup - 3) Create the new cgroup by doing mkdir's and write's (or echo's) in - the /dev/cgroup virtual file system. - 4) Start a task that will be the "founding father" of the new job. - 5) Attach that task to the new cgroup by writing its pid to the - /dev/cgroup tasks file for that cgroup. - 6) fork, exec or clone the job tasks from this founding father task. + 1) mount -t tmpfs cgroup_root /sys/fs/cgroup + 2) mkdir /sys/fs/cgroup/cpuset + 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + 4) Create the new cgroup by doing mkdir's and write's (or echo's) in + the /sys/fs/cgroup virtual file system. + 5) Start a task that will be the "founding father" of the new job. + 6) Attach that task to the new cgroup by writing its pid to the + /sys/fs/cgroup/cpuset/tasks file for that cgroup. + 7) fork, exec or clone the job tasks from this founding father task. For example, the following sequence of commands will setup a cgroup named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, and then start a subshell 'sh' in that cgroup: - mount -t cgroup cpuset -ocpuset /dev/cgroup - cd /dev/cgroup + mount -t tmpfs cgroup_root /sys/fs/cgroup + mkdir /sys/fs/cgroup/cpuset + mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset + cd /sys/fs/cgroup/cpuset mkdir Charlie cd Charlie /bin/echo 2-3 > cpuset.cpus @@ -344,7 +348,7 @@ Creating, modifying, using the cgroups can be done through the cgroup virtual filesystem. To mount a cgroup hierarchy with all available subsystems, type: -# mount -t cgroup xxx /dev/cgroup +# mount -t cgroup xxx /sys/fs/cgroup The "xxx" is not interpreted by the cgroup code, but will appear in /proc/mounts so may be any useful identifying string that you like. @@ -353,23 +357,32 @@ Note: Some subsystems do not work without some user input first. For instance, if cpusets are enabled the user will have to populate the cpus and mems files for each new cgroup created before that group can be used. +As explained in section `1.2 Why are cgroups needed?' you should create +different hierarchies of cgroups for each single resource or group of +resources you want to control. Therefore, you should mount a tmpfs on +/sys/fs/cgroup and create directories for each cgroup resource or resource +group. + +# mount -t tmpfs cgroup_root /sys/fs/cgroup +# mkdir /sys/fs/cgroup/rg1 + To mount a cgroup hierarchy with just the cpuset and memory subsystems, type: -# mount -t cgroup -o cpuset,memory hier1 /dev/cgroup +# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1 To change the set of subsystems bound to a mounted hierarchy, just remount with different options: -# mount -o remount,cpuset,blkio hier1 /dev/cgroup +# mount -o remount,cpuset,blkio hier1 /sys/fs/cgroup/rg1 Now memory is removed from the hierarchy and blkio is added. Note this will add blkio to the hierarchy but won't remove memory or cpuset, because the new options are appended to the old ones: -# mount -o remount,blkio /dev/cgroup +# mount -o remount,blkio /sys/fs/cgroup/rg1 To Specify a hierarchy's release_agent: # mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \ - xxx /dev/cgroup + xxx /sys/fs/cgroup/rg1 Note that specifying 'release_agent' more than once will return failure. @@ -378,17 +391,17 @@ when the hierarchy consists of a single (root) cgroup. Supporting the ability to arbitrarily bind/unbind subsystems from an existing cgroup hierarchy is intended to be implemented in the future. -Then under /dev/cgroup you can find a tree that corresponds to the -tree of the cgroups in the system. For instance, /dev/cgroup +Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the +tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1 is the cgroup that holds the whole system. If you want to change the value of release_agent: -# echo "/sbin/new_release_agent" > /dev/cgroup/release_agent +# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent It can also be changed via remount. -If you want to create a new cgroup under /dev/cgroup: -# cd /dev/cgroup +If you want to create a new cgroup under /sys/fs/cgroup/rg1: +# cd /sys/fs/cgroup/rg1 # mkdir my_cgroup Now you want to do something with this cgroup. @@ -430,6 +443,12 @@ You can attach the current shell task by echoing 0: # echo 0 > tasks +You can use the cgroup.procs file instead of the tasks file to move all +threads in a threadgroup at once. Echoing the pid of any task in a +threadgroup to cgroup.procs causes all tasks in that threadgroup to be +be attached to the cgroup. Writing 0 to cgroup.procs moves all tasks +in the writing task's threadgroup. + Note: Since every task is always a member of exactly one cgroup in each mounted hierarchy, to remove a task from its current cgroup you must move it into a new cgroup (possibly the root cgroup) by writing to the @@ -575,7 +594,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be called multiple times against a cgroup. int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct task_struct *task, bool threadgroup) + struct task_struct *task) (cgroup_mutex held by caller) Called prior to moving a task into a cgroup; if the subsystem @@ -584,9 +603,14 @@ task is passed, then a successful result indicates that *any* unspecified task can be moved into the cgroup. Note that this isn't called on a fork. If this method returns 0 (success) then this should remain valid while the caller holds cgroup_mutex and it is ensured that either -attach() or cancel_attach() will be called in future. If threadgroup is -true, then a successful result indicates that all threads in the given -thread's threadgroup can be moved together. +attach() or cancel_attach() will be called in future. + +int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk); +(cgroup_mutex held by caller) + +As can_attach, but for operations that must be run once per task to be +attached (possibly many when using cgroup_attach_proc). Called after +can_attach. void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *task, bool threadgroup) @@ -598,15 +622,24 @@ function, so that the subsystem can implement a rollback. If not, not necessary. This will be called only about subsystems whose can_attach() operation have succeeded. +void pre_attach(struct cgroup *cgrp); +(cgroup_mutex held by caller) + +For any non-per-thread attachment work that needs to happen before +attach_task. Needed by cpuset. + void attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task, - bool threadgroup) + struct cgroup *old_cgrp, struct task_struct *task) (cgroup_mutex held by caller) Called after the task has been attached to the cgroup, to allow any post-attachment activity that requires memory allocations or blocking. -If threadgroup is true, the subsystem should take care of all threads -in the specified thread's threadgroup. Currently does not support any + +void attach_task(struct cgroup *cgrp, struct task_struct *tsk); +(cgroup_mutex held by caller) + +As attach, but for operations that must be run once per task to be attached, +like can_attach_task. Called before attach. Currently does not support any subsystem that might need the old_cgrp for every thread in the group. void fork(struct cgroup_subsy *ss, struct task_struct *task) @@ -630,7 +663,7 @@ always handled well. void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp) (cgroup_mutex held by caller) -Called at the end of cgroup_clone() to do any parameter +Called during cgroup_create() to do any parameter initialization which might be required before a task could attach. For example in cpusets, no task may attach before 'cpus' and 'mems' are set up. diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt index 8b930946c52a..9ad85df4b983 100644 --- a/Documentation/cgroups/cpuacct.txt +++ b/Documentation/cgroups/cpuacct.txt @@ -10,26 +10,25 @@ directly present in its group. Accounting groups can be created by first mounting the cgroup filesystem. -# mkdir /cgroups -# mount -t cgroup -ocpuacct none /cgroups +# mount -t cgroup -ocpuacct none /sys/fs/cgroup -With the above step, the initial or the parent accounting group -becomes visible at /cgroups. At bootup, this group includes all the -tasks in the system. /cgroups/tasks lists the tasks in this cgroup. -/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by -this group which is essentially the CPU time obtained by all the tasks +With the above step, the initial or the parent accounting group becomes +visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in +the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup. +/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained +by this group which is essentially the CPU time obtained by all the tasks in the system. -New accounting groups can be created under the parent group /cgroups. +New accounting groups can be created under the parent group /sys/fs/cgroup. -# cd /cgroups +# cd /sys/fs/cgroup # mkdir g1 # echo $$ > g1 The above steps create a new group g1 and move the current shell process (bash) into it. CPU time consumed by this bash and its children can be obtained from g1/cpuacct.usage and the same is accumulated in -/cgroups/cpuacct.usage also. +/sys/fs/cgroup/cpuacct.usage also. cpuacct.stat file lists a few statistics which further divide the CPU time obtained by the cgroup into user and system times. Currently diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 98a30829af7a..5b0d78e55ccc 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt @@ -661,21 +661,21 @@ than stress the kernel. To start a new job that is to be contained within a cpuset, the steps are: - 1) mkdir /dev/cpuset - 2) mount -t cgroup -ocpuset cpuset /dev/cpuset + 1) mkdir /sys/fs/cgroup/cpuset + 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset 3) Create the new cpuset by doing mkdir's and write's (or echo's) in - the /dev/cpuset virtual file system. + the /sys/fs/cgroup/cpuset virtual file system. 4) Start a task that will be the "founding father" of the new job. 5) Attach that task to the new cpuset by writing its pid to the - /dev/cpuset tasks file for that cpuset. + /sys/fs/cgroup/cpuset tasks file for that cpuset. 6) fork, exec or clone the job tasks from this founding father task. For example, the following sequence of commands will setup a cpuset named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, and then start a subshell 'sh' in that cpuset: - mount -t cgroup -ocpuset cpuset /dev/cpuset - cd /dev/cpuset + mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset + cd /sys/fs/cgroup/cpuset mkdir Charlie cd Charlie /bin/echo 2-3 > cpuset.cpus @@ -710,14 +710,14 @@ Creating, modifying, using the cpusets can be done through the cpuset virtual filesystem. To mount it, type: -# mount -t cgroup -o cpuset cpuset /dev/cpuset +# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset -Then under /dev/cpuset you can find a tree that corresponds to the -tree of the cpusets in the system. For instance, /dev/cpuset +Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the +tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset is the cpuset that holds the whole system. -If you want to create a new cpuset under /dev/cpuset: -# cd /dev/cpuset +If you want to create a new cpuset under /sys/fs/cgroup/cpuset: +# cd /sys/fs/cgroup/cpuset # mkdir my_cpuset Now you want to do something with this cpuset. @@ -765,12 +765,12 @@ wrapper around the cgroup filesystem. The command -mount -t cpuset X /dev/cpuset +mount -t cpuset X /sys/fs/cgroup/cpuset is equivalent to -mount -t cgroup -ocpuset,noprefix X /dev/cpuset -echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent +mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset +echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent 2.2 Adding/removing cpus ------------------------ diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 57ca4c89fe5c..16624a7f8222 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt @@ -22,16 +22,16 @@ removed from the child(ren). An entry is added using devices.allow, and removed using devices.deny. For instance - echo 'c 1:3 mr' > /cgroups/1/devices.allow + echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow allows cgroup 1 to read and mknod the device usually known as /dev/null. Doing - echo a > /cgroups/1/devices.deny + echo a > /sys/fs/cgroup/1/devices.deny will remove the default 'a *:* rwm' entry. Doing - echo a > /cgroups/1/devices.allow + echo a > /sys/fs/cgroup/1/devices.allow will add the 'a *:* rwm' entry to the whitelist. diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt index 41f37fea1276..c21d77742a07 100644 --- a/Documentation/cgroups/freezer-subsystem.txt +++ b/Documentation/cgroups/freezer-subsystem.txt @@ -59,28 +59,28 @@ is non-freezable. * Examples of usage : - # mkdir /containers - # mount -t cgroup -ofreezer freezer /containers - # mkdir /containers/0 - # echo $some_pid > /containers/0/tasks + # mkdir /sys/fs/cgroup/freezer + # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer + # mkdir /sys/fs/cgroup/freezer/0 + # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks to get status of the freezer subsystem : - # cat /containers/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state THAWED to freeze all tasks in the container : - # echo FROZEN > /containers/0/freezer.state - # cat /containers/0/freezer.state + # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state FREEZING - # cat /containers/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state FROZEN to unfreeze all tasks in the container : - # echo THAWED > /containers/0/freezer.state - # cat /containers/0/freezer.state + # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state + # cat /sys/fs/cgroup/freezer/0/freezer.state THAWED This is the basic mechanism which should do the right thing for user space task diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index b6ed61c95856..06eb6d957c83 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -1,8 +1,8 @@ Memory Resource Controller -NOTE: The Memory Resource Controller has been generically been referred - to as the memory controller in this document. Do not confuse memory - controller used here with the memory controller that is used in hardware. +NOTE: The Memory Resource Controller has generically been referred to as the + memory controller in this document. Do not confuse memory controller + used here with the memory controller that is used in hardware. (For editors) In this document: @@ -52,8 +52,10 @@ Brief summary of control files. tasks # attach a task(thread) and show list of threads cgroup.procs # show list of processes cgroup.event_control # an interface for event_fd() - memory.usage_in_bytes # show current memory(RSS+Cache) usage. - memory.memsw.usage_in_bytes # show current memory+Swap usage + memory.usage_in_bytes # show current res_counter usage for memory + (See 5.5 for details) + memory.memsw.usage_in_bytes # show current res_counter usage for memory+Swap + (See 5.5 for details) memory.limit_in_bytes # set/show limit of memory usage memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage memory.failcnt # show the number of memory usage hits limits @@ -68,6 +70,7 @@ Brief summary of control files. (See sysctl's vm.swappiness) memory.move_charge_at_immigrate # set/show controls of moving charges memory.oom_control # set/show oom controls. + memory.numa_stat # show the number of memory usage per numa node 1. History @@ -179,7 +182,7 @@ behind this approach is that a cgroup that aggressively uses a shared page will eventually get charged for it (once it is uncharged from the cgroup that brought it in -- this will happen on memory pressure). -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. +Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. @@ -211,7 +214,7 @@ affecting global LRU, memory+swap limit is better than just limiting swap from OS point of view. * What happens when a cgroup hits memory.memsw.limit_in_bytes -When a cgroup his memory.memsw.limit_in_bytes, it's useless to do swap-out +When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out in this cgroup. Then, swap-out will not be done by cgroup routine and file caches are dropped. But as mentioned above, global LRU can do swapout memory from it for sanity of the system's memory management state. You can't forbid @@ -261,16 +264,17 @@ b. Enable CONFIG_RESOURCE_COUNTERS c. Enable CONFIG_CGROUP_MEM_RES_CTLR d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) -1. Prepare the cgroups -# mkdir -p /cgroups -# mount -t cgroup none /cgroups -o memory +1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) +# mount -t tmpfs none /sys/fs/cgroup +# mkdir /sys/fs/cgroup/memory +# mount -t cgroup none /sys/fs/cgroup/memory -o memory 2. Make the new group and move bash into it -# mkdir /cgroups/0 -# echo $$ > /cgroups/0/tasks +# mkdir /sys/fs/cgroup/memory/0 +# echo $$ > /sys/fs/cgroup/memory/0/tasks Since now we're in the 0 cgroup, we can alter the memory limit: -# echo 4M > /cgroups/0/memory.limit_in_bytes +# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) @@ -278,11 +282,11 @@ mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.) NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited). NOTE: We cannot set limits on the root cgroup any more. -# cat /cgroups/0/memory.limit_in_bytes +# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes 4194304 We can check the usage: -# cat /cgroups/0/memory.usage_in_bytes +# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes 1216512 A successful write to this file does not guarantee a successful set of @@ -453,6 +457,33 @@ memory under it will be reclaimed. You can reset failcnt by writing 0 to failcnt file. # echo 0 > .../memory.failcnt +5.5 usage_in_bytes + +For efficiency, as other kernel components, memory cgroup uses some optimization +to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the +method and doesn't show 'exact' value of memory(and swap) usage, it's an fuzz +value for efficient access. (Of course, when necessary, it's synchronized.) +If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP) +value in memory.stat(see 5.2). + +5.6 numa_stat + +This is similar to numa_maps but operates on a per-memcg basis. This is +useful for providing visibility into the numa locality information within +an memcg since the pages are allowed to be allocated from any physical +node. One of the usecases is evaluating application performance by +combining this information with the application's cpu allocation. + +We export "total", "file", "anon" and "unevictable" pages per-node for +each memcg. The ouput format of memory.numa_stat is: + +total= N0= N1= ... +file= N0= N1= ... +anon= N0= N1= ... +unevictable= N0= N1= ... + +And we have total = file + anon + unevictable. + 6. Hierarchy support The memory controller supports a deep hierarchy and hierarchical accounting. @@ -460,13 +491,13 @@ The hierarchy is created by creating the appropriate cgroups in the cgroup filesystem. Consider for example, the following cgroup filesystem hierarchy - root + root / | \ - / | \ - a b c - | \ - | \ - d e + / | \ + a b c + | \ + | \ + d e In the diagram above, with hierarchical accounting enabled, all memory usage of e, is accounted to its ancestors up until the root (i.e, c and root), diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt deleted file mode 100644 index 995baf379c07..000000000000 --- a/Documentation/credentials.txt +++ /dev/null @@ -1,581 +0,0 @@ - ==================== - CREDENTIALS IN LINUX - ==================== - -By: David Howells - -Contents: - - (*) Overview. - - (*) Types of credentials. - - (*) File markings. - - (*) Task credentials. - - - Immutable credentials. - - Accessing task credentials. - - Accessing another task's credentials. - - Altering credentials. - - Managing credentials. - - (*) Open file credentials. - - (*) Overriding the VFS's use of credentials. - - -======== -OVERVIEW -======== - -There are several parts to the security check performed by Linux when one -object acts upon another: - - (1) Objects. - - Objects are things in the system that may be acted upon directly by - userspace programs. Linux has a variety of actionable objects, including: - - - Tasks - - Files/inodes - - Sockets - - Message queues - - Shared memory segments - - Semaphores - - Keys - - As a part of the description of all these objects there is a set of - credentials. What's in the set depends on the type of object. - - (2) Object ownership. - - Amongst the credentials of most objects, there will be a subset that - indicates the ownership of that object. This is used for resource - accounting and limitation (disk quotas and task rlimits for example). - - In a standard UNIX filesystem, for instance, this will be defined by the - UID marked on the inode. - - (3) The objective context. - - Also amongst the credentials of those objects, there will be a subset that - indicates the 'objective context' of that object. This may or may not be - the same set as in (2) - in standard UNIX files, for instance, this is the - defined by the UID and the GID marked on the inode. - - The objective context is used as part of the security calculation that is - carried out when an object is acted upon. - - (4) Subjects. - - A subject is an object that is acting upon another object. - - Most of the objects in the system are inactive: they don't act on other - objects within the system. Processes/tasks are the obvious exception: - they do stuff; they access and manipulate things. - - Objects other than tasks may under some circumstances also be subjects. - For instance an open file may send SIGIO to a task using the UID and EUID - given to it by a task that called fcntl(F_SETOWN) upon it. In this case, - the file struct will have a subjective context too. - - (5) The subjective context. - - A subject has an additional interpretation of its credentials. A subset - of its credentials forms the 'subjective context'. The subjective context - is used as part of the security calculation that is carried out when a - subject acts. - - A Linux task, for example, has the FSUID, FSGID and the supplementary - group list for when it is acting upon a file - which are quite separate - from the real UID and GID that normally form the objective context of the - task. - - (6) Actions. - - Linux has a number of actions available that a subject may perform upon an - object. The set of actions available depends on the nature of the subject - and the object. - - Actions include reading, writing, creating and deleting files; forking or - signalling and tracing tasks. - - (7) Rules, access control lists and security calculations. - - When a subject acts upon an object, a security calculation is made. This - involves taking the subjective context, the objective context and the - action, and searching one or more sets of rules to see whether the subject - is granted or denied permission to act in the desired manner on the - object, given those contexts. - - There are two main sources of rules: - - (a) Discretionary access control (DAC): - - Sometimes the object will include sets of rules as part of its - description. This is an 'Access Control List' or 'ACL'. A Linux - file may supply more than one ACL. - - A traditional UNIX file, for example, includes a permissions mask that - is an abbreviated ACL with three fixed classes of subject ('user', - 'group' and 'other'), each of which may be granted certain privileges - ('read', 'write' and 'execute' - whatever those map to for the object - in question). UNIX file permissions do not allow the arbitrary - specification of subjects, however, and so are of limited use. - - A Linux file might also sport a POSIX ACL. This is a list of rules - that grants various permissions to arbitrary subjects. - - (b) Mandatory access control (MAC): - - The system as a whole may have one or more sets of rules that get - applied to all subjects and objects, regardless of their source. - SELinux and Smack are examples of this. - - In the case of SELinux and Smack, each object is given a label as part - of its credentials. When an action is requested, they take the - subject label, the object label and the action and look for a rule - that says that this action is either granted or denied. - - -==================== -TYPES OF CREDENTIALS -==================== - -The Linux kernel supports the following types of credentials: - - (1) Traditional UNIX credentials. - - Real User ID - Real Group ID - - The UID and GID are carried by most, if not all, Linux objects, even if in - some cases it has to be invented (FAT or CIFS files for example, which are - derived from Windows). These (mostly) define the objective context of - that object, with tasks being slightly different in some cases. - - Effective, Saved and FS User ID - Effective, Saved and FS Group ID - Supplementary groups - - These are additional credentials used by tasks only. Usually, an - EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID - will be used as the objective. For tasks, it should be noted that this is - not always true. - - (2) Capabilities. - - Set of permitted capabilities - Set of inheritable capabilities - Set of effective capabilities - Capability bounding set - - These are only carried by tasks. They indicate superior capabilities - granted piecemeal to a task that an ordinary task wouldn't otherwise have. - These are manipulated implicitly by changes to the traditional UNIX - credentials, but can also be manipulated directly by the capset() system - call. - - The permitted capabilities are those caps that the process might grant - itself to its effective or permitted sets through capset(). This - inheritable set might also be so constrained. - - The effective capabilities are the ones that a task is actually allowed to - make use of itself. - - The inheritable capabilities are the ones that may get passed across - execve(). - - The bounding set limits the capabilities that may be inherited across - execve(), especially when a binary is executed that will execute as UID 0. - - (3) Secure management flags (securebits). - - These are only carried by tasks. These govern the way the above - credentials are manipulated and inherited over certain operations such as - execve(). They aren't used directly as objective or subjective - credentials. - - (4) Keys and keyrings. - - These are only carried by tasks. They carry and cache security tokens - that don't fit into the other standard UNIX credentials. They are for - making such things as network filesystem keys available to the file - accesses performed by processes, without the necessity of ordinary - programs having to know about security details involved. - - Keyrings are a special type of key. They carry sets of other keys and can - be searched for the desired key. Each process may subscribe to a number - of keyrings: - - Per-thread keying - Per-process keyring - Per-session keyring - - When a process accesses a key, if not already present, it will normally be - cached on one of these keyrings for future accesses to find. - - For more information on using keys, see Documentation/keys.txt. - - (5) LSM - - The Linux Security Module allows extra controls to be placed over the - operations that a task may do. Currently Linux supports two main - alternate LSM options: SELinux and Smack. - - Both work by labelling the objects in a system and then applying sets of - rules (policies) that say what operations a task with one label may do to - an object with another label. - - (6) AF_KEY - - This is a socket-based approach to credential management for networking - stacks [RFC 2367]. It isn't discussed by this document as it doesn't - interact directly with task and file credentials; rather it keeps system - level credentials. - - -When a file is opened, part of the opening task's subjective context is -recorded in the file struct created. This allows operations using that file -struct to use those credentials instead of the subjective context of the task -that issued the operation. An example of this would be a file opened on a -network filesystem where the credentials of the opened file should be presented -to the server, regardless of who is actually doing a read or a write upon it. - - -============= -FILE MARKINGS -============= - -Files on disk or obtained over the network may have annotations that form the -objective security context of that file. Depending on the type of filesystem, -this may include one or more of the following: - - (*) UNIX UID, GID, mode; - - (*) Windows user ID; - - (*) Access control list; - - (*) LSM security label; - - (*) UNIX exec privilege escalation bits (SUID/SGID); - - (*) File capabilities exec privilege escalation bits. - -These are compared to the task's subjective security context, and certain -operations allowed or disallowed as a result. In the case of execve(), the -privilege escalation bits come into play, and may allow the resulting process -extra privileges, based on the annotations on the executable file. - - -================ -TASK CREDENTIALS -================ - -In Linux, all of a task's credentials are held in (uid, gid) or through -(groups, keys, LSM security) a refcounted structure of type 'struct cred'. -Each task points to its credentials by a pointer called 'cred' in its -task_struct. - -Once a set of credentials has been prepared and committed, it may not be -changed, barring the following exceptions: - - (1) its reference count may be changed; - - (2) the reference count on the group_info struct it points to may be changed; - - (3) the reference count on the security data it points to may be changed; - - (4) the reference count on any keyrings it points to may be changed; - - (5) any keyrings it points to may be revoked, expired or have their security - attributes changed; and - - (6) the contents of any keyrings to which it points may be changed (the whole - point of keyrings being a shared set of credentials, modifiable by anyone - with appropriate access). - -To alter anything in the cred struct, the copy-and-replace principle must be -adhered to. First take a copy, then alter the copy and then use RCU to change -the task pointer to make it point to the new copy. There are wrappers to aid -with this (see below). - -A task may only alter its _own_ credentials; it is no longer permitted for a -task to alter another's credentials. This means the capset() system call is no -longer permitted to take any PID other than the one of the current process. -Also keyctl_instantiate() and keyctl_negate() functions no longer permit -attachment to process-specific keyrings in the requesting process as the -instantiating process may need to create them. - - -IMMUTABLE CREDENTIALS ---------------------- - -Once a set of credentials has been made public (by calling commit_creds() for -example), it must be considered immutable, barring two exceptions: - - (1) The reference count may be altered. - - (2) Whilst the keyring subscriptions of a set of credentials may not be - changed, the keyrings subscribed to may have their contents altered. - -To catch accidental credential alteration at compile time, struct task_struct -has _const_ pointers to its credential sets, as does struct file. Furthermore, -certain functions such as get_cred() and put_cred() operate on const pointers, -thus rendering casts unnecessary, but require to temporarily ditch the const -qualification to be able to alter the reference count. - - -ACCESSING TASK CREDENTIALS --------------------------- - -A task being able to alter only its own credentials permits the current process -to read or replace its own credentials without the need for any form of locking -- which simplifies things greatly. It can just call: - - const struct cred *current_cred() - -to get a pointer to its credentials structure, and it doesn't have to release -it afterwards. - -There are convenience wrappers for retrieving specific aspects of a task's -credentials (the value is simply returned in each case): - - uid_t current_uid(void) Current's real UID - gid_t current_gid(void) Current's real GID - uid_t current_euid(void) Current's effective UID - gid_t current_egid(void) Current's effective GID - uid_t current_fsuid(void) Current's file access UID - gid_t current_fsgid(void) Current's file access GID - kernel_cap_t current_cap(void) Current's effective capabilities - void *current_security(void) Current's LSM security pointer - struct user_struct *current_user(void) Current's user account - -There are also convenience wrappers for retrieving specific associated pairs of -a task's credentials: - - void current_uid_gid(uid_t *, gid_t *); - void current_euid_egid(uid_t *, gid_t *); - void current_fsuid_fsgid(uid_t *, gid_t *); - -which return these pairs of values through their arguments after retrieving -them from the current task's credentials. - - -In addition, there is a function for obtaining a reference on the current -process's current set of credentials: - - const struct cred *get_current_cred(void); - -and functions for getting references to one of the credentials that don't -actually live in struct cred: - - struct user_struct *get_current_user(void); - struct group_info *get_current_groups(void); - -which get references to the current process's user accounting structure and -supplementary groups list respectively. - -Once a reference has been obtained, it must be released with put_cred(), -free_uid() or put_group_info() as appropriate. - - -ACCESSING ANOTHER TASK'S CREDENTIALS ------------------------------------- - -Whilst a task may access its own credentials without the need for locking, the -same is not true of a task wanting to access another task's credentials. It -must use the RCU read lock and rcu_dereference(). - -The rcu_dereference() is wrapped by: - - const struct cred *__task_cred(struct task_struct *task); - -This should be used inside the RCU read lock, as in the following example: - - void foo(struct task_struct *t, struct foo_data *f) - { - const struct cred *tcred; - ... - rcu_read_lock(); - tcred = __task_cred(t); - f->uid = tcred->uid; - f->gid = tcred->gid; - f->groups = get_group_info(tcred->groups); - rcu_read_unlock(); - ... - } - -Should it be necessary to hold another task's credentials for a long period of -time, and possibly to sleep whilst doing so, then the caller should get a -reference on them using: - - const struct cred *get_task_cred(struct task_struct *task); - -This does all the RCU magic inside of it. The caller must call put_cred() on -the credentials so obtained when they're finished with. - - [*] Note: The result of __task_cred() should not be passed directly to - get_cred() as this may race with commit_cred(). - -There are a couple of convenience functions to access bits of another task's -credentials, hiding the RCU magic from the caller: - - uid_t task_uid(task) Task's real UID - uid_t task_euid(task) Task's effective UID - -If the caller is holding the RCU read lock at the time anyway, then: - - __task_cred(task)->uid - __task_cred(task)->euid - -should be used instead. Similarly, if multiple aspects of a task's credentials -need to be accessed, RCU read lock should be used, __task_cred() called, the -result stored in a temporary pointer and then the credential aspects called -from that before dropping the lock. This prevents the potentially expensive -RCU magic from being invoked multiple times. - -Should some other single aspect of another task's credentials need to be -accessed, then this can be used: - - task_cred_xxx(task, member) - -where 'member' is a non-pointer member of the cred struct. For instance: - - uid_t task_cred_xxx(task, suid); - -will retrieve 'struct cred::suid' from the task, doing the appropriate RCU -magic. This may not be used for pointer members as what they point to may -disappear the moment the RCU read lock is dropped. - - -ALTERING CREDENTIALS --------------------- - -As previously mentioned, a task may only alter its own credentials, and may not -alter those of another task. This means that it doesn't need to use any -locking to alter its own credentials. - -To alter the current process's credentials, a function should first prepare a -new set of credentials by calling: - - struct cred *prepare_creds(void); - -this locks current->cred_replace_mutex and then allocates and constructs a -duplicate of the current process's credentials, returning with the mutex still -held if successful. It returns NULL if not successful (out of memory). - -The mutex prevents ptrace() from altering the ptrace state of a process whilst -security checks on credentials construction and changing is taking place as -the ptrace state may alter the outcome, particularly in the case of execve(). - -The new credentials set should be altered appropriately, and any security -checks and hooks done. Both the current and the proposed sets of credentials -are available for this purpose as current_cred() will return the current set -still at this point. - - -When the credential set is ready, it should be committed to the current process -by calling: - - int commit_creds(struct cred *new); - -This will alter various aspects of the credentials and the process, giving the -LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually -commit the new credentials to current->cred, it will release -current->cred_replace_mutex to allow ptrace() to take place, and it will notify -the scheduler and others of the changes. - -This function is guaranteed to return 0, so that it can be tail-called at the -end of such functions as sys_setresuid(). - -Note that this function consumes the caller's reference to the new credentials. -The caller should _not_ call put_cred() on the new credentials afterwards. - -Furthermore, once this function has been called on a new set of credentials, -those credentials may _not_ be changed further. - - -Should the security checks fail or some other error occur after prepare_creds() -has been called, then the following function should be invoked: - - void abort_creds(struct cred *new); - -This releases the lock on current->cred_replace_mutex that prepare_creds() got -and then releases the new credentials. - - -A typical credentials alteration function would look something like this: - - int alter_suid(uid_t suid) - { - struct cred *new; - int ret; - - new = prepare_creds(); - if (!new) - return -ENOMEM; - - new->suid = suid; - ret = security_alter_suid(new); - if (ret < 0) { - abort_creds(new); - return ret; - } - - return commit_creds(new); - } - - -MANAGING CREDENTIALS --------------------- - -There are some functions to help manage credentials: - - (*) void put_cred(const struct cred *cred); - - This releases a reference to the given set of credentials. If the - reference count reaches zero, the credentials will be scheduled for - destruction by the RCU system. - - (*) const struct cred *get_cred(const struct cred *cred); - - This gets a reference on a live set of credentials, returning a pointer to - that set of credentials. - - (*) struct cred *get_new_cred(struct cred *cred); - - This gets a reference on a set of credentials that is under construction - and is thus still mutable, returning a pointer to that set of credentials. - - -===================== -OPEN FILE CREDENTIALS -===================== - -When a new file is opened, a reference is obtained on the opening task's -credentials and this is attached to the file struct as 'f_cred' in place of -'f_uid' and 'f_gid'. Code that used to access file->f_uid and file->f_gid -should now access file->f_cred->fsuid and file->f_cred->fsgid. - -It is safe to access f_cred without the use of RCU or locking because the -pointer will not change over the lifetime of the file struct, and nor will the -contents of the cred struct pointed to, barring the exceptions listed above -(see the Task Credentials section). - - -======================================= -OVERRIDING THE VFS'S USE OF CREDENTIALS -======================================= - -Under some circumstances it is desirable to override the credentials used by -the VFS, and that can be done by calling into such as vfs_mkdir() with a -different set of credentials. This is done in the following places: - - (*) sys_faccessat(). - - (*) do_coredump(). - - (*) nfs4recover.c. diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt new file mode 100644 index 000000000000..bf57ecd5d73a --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt @@ -0,0 +1,397 @@ +===================================================================== +SEC 4 Device Tree Binding +Copyright (C) 2008-2011 Freescale Semiconductor Inc. + + CONTENTS + -Overview + -SEC 4 Node + -Job Ring Node + -Run Time Integrity Check (RTIC) Node + -Run Time Integrity Check (RTIC) Memory Node + -Secure Non-Volatile Storage (SNVS) Node + -Full Example + +NOTE: the SEC 4 is also known as Freescale's Cryptographic Accelerator +Accelerator and Assurance Module (CAAM). + +===================================================================== +Overview + +DESCRIPTION + +SEC 4 h/w can process requests from 2 types of sources. +1. DPAA Queue Interface (HW interface between Queue Manager & SEC 4). +2. Job Rings (HW interface between cores & SEC 4 registers). + +High Speed Data Path Configuration: + +HW interface between QM & SEC 4 and also BM & SEC 4, on DPAA-enabled parts +such as the P4080. The number of simultaneous dequeues the QI can make is +equal to the number of Descriptor Controller (DECO) engines in a particular +SEC version. E.g., the SEC 4.0 in the P4080 has 5 DECOs and can thus +dequeue from 5 subportals simultaneously. + +Job Ring Data Path Configuration: + +Each JR is located on a separate 4k page, they may (or may not) be made visible +in the memory partition devoted to a particular core. The P4080 has 4 JRs, so +up to 4 JRs can be configured; and all 4 JRs process requests in parallel. + +===================================================================== +SEC 4 Node + +Description + + Node defines the base address of the SEC 4 block. + This block specifies the address range of all global + configuration registers for the SEC 4 block. It + also receives interrupts from the Run Time Integrity Check + (RTIC) function within the SEC 4 block. + +PROPERTIES + + - compatible + Usage: required + Value type: + Definition: Must include "fsl,sec-v4.0" + + - #address-cells + Usage: required + Value type: + Definition: A standard property. Defines the number of cells + for representing physical addresses in child nodes. + + - #size-cells + Usage: required + Value type: + Definition: A standard property. Defines the number of cells + for representing the size of physical addresses in + child nodes. + + - reg + Usage: required + Value type: + Definition: A standard property. Specifies the physical + address and length of the SEC4 configuration registers. + registers + + - ranges + Usage: required + Value type: + Definition: A standard property. Specifies the physical address + range of the SEC 4.0 register space (-SNVS not included). A + triplet that includes the child address, parent address, & + length. + + - interrupts + Usage: required + Value type: + Definition: Specifies the interrupts generated by this + device. The value of the interrupts property + consists of one interrupt specifier. The format + of the specifier is defined by the binding document + describing the node's interrupt parent. + + - interrupt-parent + Usage: (required if interrupt property is defined) + Value type: + Definition: A single value that points + to the interrupt parent to which the child domain + is being mapped. + + Note: All other standard properties (see the ePAPR) are allowed + but are optional. + + +EXAMPLE + crypto@300000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x300000 0x10000>; + ranges = <0 0x300000 0x10000>; + interrupt-parent = <&mpic>; + interrupts = <92 2>; + }; + +===================================================================== +Job Ring (JR) Node + + Child of the crypto node defines data processing interface to SEC 4 + across the peripheral bus for purposes of processing + cryptographic descriptors. The specified address + range can be made visible to one (or more) cores. + The interrupt defined for this node is controlled within + the address range of this node. + + - compatible + Usage: required + Value type: + Definition: Must include "fsl,sec-v4.0-job-ring" + + - reg + Usage: required + Value type: + Definition: Specifies a two JR parameters: an offset from + the parent physical address and the length the JR registers. + + - fsl,liodn + Usage: optional-but-recommended + Value type: + Definition: + Specifies the LIODN to be used in conjunction with + the ppid-to-liodn table that specifies the PPID to LIODN mapping. + Needed if the PAMU is used. Value is a 12 bit value + where value is a LIODN ID for this JR. This property is + normally set by boot firmware. + + - interrupts + Usage: required + Value type: + Definition: Specifies the interrupts generated by this + device. The value of the interrupts property + consists of one interrupt specifier. The format + of the specifier is defined by the binding document + describing the node's interrupt parent. + + - interrupt-parent + Usage: (required if interrupt property is defined) + Value type: + Definition: A single value that points + to the interrupt parent to which the child domain + is being mapped. + +EXAMPLE + jr@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + fsl,liodn = <0x081>; + interrupt-parent = <&mpic>; + interrupts = <88 2>; + }; + + +===================================================================== +Run Time Integrity Check (RTIC) Node + + Child node of the crypto node. Defines a register space that + contains up to 5 sets of addresses and their lengths (sizes) that + will be checked at run time. After an initial hash result is + calculated, these addresses are checked by HW to monitor any + change. If any memory is modified, a Security Violation is + triggered (see SNVS definition). + + + - compatible + Usage: required + Value type: + Definition: Must include "fsl,sec-v4.0-rtic". + + - #address-cells + Usage: required + Value type: + Definition: A standard property. Defines the number of cells + for representing physical addresses in child nodes. Must + have a value of 1. + + - #size-cells + Usage: required + Value type: + Definition: A standard property. Defines the number of cells + for representing the size of physical addresses in + child nodes. Must have a value of 1. + + - reg + Usage: required + Value type: + Definition: A standard property. Specifies a two parameters: + an offset from the parent physical address and the length + the SEC4 registers. + + - ranges + Usage: required + Value type: + Definition: A standard property. Specifies the physical address + range of the SEC 4 register space (-SNVS not included). A + triplet that includes the child address, parent address, & + length. + +EXAMPLE + rtic@6000 { + compatible = "fsl,sec-v4.0-rtic"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x6000 0x100>; + ranges = <0x0 0x6100 0xe00>; + }; + +===================================================================== +Run Time Integrity Check (RTIC) Memory Node + A child node that defines individual RTIC memory regions that are used to + perform run-time integrity check of memory areas that should not modified. + The node defines a register that contains the memory address & + length (combined) and a second register that contains the hash result + in big endian format. + + - compatible + Usage: required + Value type: + Definition: Must include "fsl,sec-v4.0-rtic-memory". + + - reg + Usage: required + Value type: + Definition: A standard property. Specifies two parameters: + an offset from the parent physical address and the length: + + 1. The location of the RTIC memory address & length registers. + 2. The location RTIC hash result. + + - fsl,rtic-region + Usage: optional-but-recommended + Value type: + Definition: + Specifies the HW address (36 bit address) for this region + followed by the length of the HW partition to be checked; + the address is represented as a 64 bit quantity followed + by a 32 bit length. + + - fsl,liodn + Usage: optional-but-recommended + Value type: + Definition: + Specifies the LIODN to be used in conjunction with + the ppid-to-liodn table that specifies the PPID to LIODN + mapping. Needed if the PAMU is used. Value is a 12 bit value + where value is a LIODN ID for this RTIC memory region. This + property is normally set by boot firmware. + +EXAMPLE + rtic-a@0 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x00 0x20 0x100 0x80>; + fsl,liodn = <0x03c>; + fsl,rtic-region = <0x12345678 0x12345678 0x12345678>; + }; + +===================================================================== +Secure Non-Volatile Storage (SNVS) Node + + Node defines address range and the associated + interrupt for the SNVS function. This function + monitors security state information & reports + security violations. + + - compatible + Usage: required + Value type: + Definition: Must include "fsl,sec-v4.0-mon". + + - reg + Usage: required + Value type: + Definition: A standard property. Specifies the physical + address and length of the SEC4 configuration + registers. + + - interrupts + Usage: required + Value type: + Definition: Specifies the interrupts generated by this + device. The value of the interrupts property + consists of one interrupt specifier. The format + of the specifier is defined by the binding document + describing the node's interrupt parent. + + - interrupt-parent + Usage: (required if interrupt property is defined) + Value type: + Definition: A single value that points + to the interrupt parent to which the child domain + is being mapped. + +EXAMPLE + sec_mon@314000 { + compatible = "fsl,sec-v4.0-mon"; + reg = <0x314000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <93 2>; + }; + +===================================================================== +FULL EXAMPLE + + crypto: crypto@300000 { + compatible = "fsl,sec-v4.0"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x300000 0x10000>; + ranges = <0 0x300000 0x10000>; + interrupt-parent = <&mpic>; + interrupts = <92 2>; + + sec_jr0: jr@1000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x1000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <88 2>; + }; + + sec_jr1: jr@2000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x2000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <89 2>; + }; + + sec_jr2: jr@3000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x3000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <90 2>; + }; + + sec_jr3: jr@4000 { + compatible = "fsl,sec-v4.0-job-ring"; + reg = <0x4000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <91 2>; + }; + + rtic@6000 { + compatible = "fsl,sec-v4.0-rtic"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x6000 0x100>; + ranges = <0x0 0x6100 0xe00>; + + rtic_a: rtic-a@0 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x00 0x20 0x100 0x80>; + }; + + rtic_b: rtic-b@20 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x20 0x20 0x200 0x80>; + }; + + rtic_c: rtic-c@40 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x40 0x20 0x300 0x80>; + }; + + rtic_d: rtic-d@60 { + compatible = "fsl,sec-v4.0-rtic-memory"; + reg = <0x60 0x20 0x500 0x80>; + }; + }; + }; + + sec_mon: sec_mon@314000 { + compatible = "fsl,sec-v4.0-mon"; + reg = <0x314000 0x1000>; + interrupt-parent = <&mpic>; + interrupts = <93 2>; + }; + +===================================================================== diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt new file mode 100644 index 000000000000..7190c99d7611 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt @@ -0,0 +1,36 @@ +Device-Tree bindings for input/gpio_keys.c keyboard driver + +Required properties: + - compatible = "gpio-keys"; + +Optional properties: + - autorepeat: Boolean, Enable auto repeat feature of Linux input + subsystem. + +Each button (key) is represented as a sub-node of "gpio-keys": +Subnode properties: + + - gpios: OF devcie-tree gpio specificatin. + - label: Descriptive name of the key. + - linux,code: Keycode to emit. + +Optional subnode-properties: + - linux,input-type: Specify event type this button/key generates. + If not specified defaults to <1> == EV_KEY. + - debounce-interval: Debouncing interval time in milliseconds. + If not specified defaults to 5. + - gpio-key,wakeup: Boolean, button can wake-up the system. + +Example nodes: + + gpio_keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + autorepeat; + button@21 { + label = "GPIO Key UP"; + linux,code = <103>; + gpios = <&gpio1 0 1>; + }; + ... diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt new file mode 100755 index 000000000000..1a729f089866 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt @@ -0,0 +1,61 @@ +CAN Device Tree Bindings +------------------------ +2011 Freescale Semiconductor, Inc. + +fsl,flexcan-v1.0 nodes +----------------------- +In addition to the required compatible-, reg- and interrupt-properties, you can +also specify which clock source shall be used for the controller. + +CPI Clock- Can Protocol Interface Clock + This CLK_SRC bit of CTRL(control register) selects the clock source to + the CAN Protocol Interface(CPI) to be either the peripheral clock + (driven by the PLL) or the crystal oscillator clock. The selected clock + is the one fed to the prescaler to generate the Serial Clock (Sclock). + The PRESDIV field of CTRL(control register) controls a prescaler that + generates the Serial Clock (Sclock), whose period defines the + time quantum used to compose the CAN waveform. + +Can Engine Clock Source + There are two sources for CAN clock + - Platform Clock It represents the bus clock + - Oscillator Clock + + Peripheral Clock (PLL) + -------------- + | + --------- ------------- + | |CPI Clock | Prescaler | Sclock + | |---------------->| (1.. 256) |------------> + --------- ------------- + | | + -------------- ---------------------CLK_SRC + Oscillator Clock + +- fsl,flexcan-clock-source : CAN Engine Clock Source.This property selects + the peripheral clock. PLL clock is fed to the + prescaler to generate the Serial Clock (Sclock). + Valid values are "oscillator" and "platform" + "oscillator": CAN engine clock source is oscillator clock. + "platform" The CAN engine clock source is the bus clock + (platform clock). + +- fsl,flexcan-clock-divider : for the reference and system clock, an additional + clock divider can be specified. +- clock-frequency: frequency required to calculate the bitrate for FlexCAN. + +Note: + - v1.0 of flexcan-v1.0 represent the IP block version for P1010 SOC. + - P1010 does not have oscillator as the Clock Source.So the default + Clock Source is platform clock. +Examples: + + can0@1c000 { + compatible = "fsl,flexcan-v1.0"; + reg = <0x1c000 0x1000>; + interrupts = <48 0x2>; + interrupt-parent = <&mpic>; + fsl,flexcan-clock-source = "platform"; + fsl,flexcan-clock-divider = <2>; + clock-frequency = ; + }; diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt index edb7ae19e868..2c6be0377f55 100644 --- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt +++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt @@ -74,3 +74,57 @@ Example: interrupt-parent = <&mpic>; phy-handle = <&phy0> }; + +* Gianfar PTP clock nodes + +General Properties: + + - compatible Should be "fsl,etsec-ptp" + - reg Offset and length of the register set for the device + - interrupts There should be at least two interrupts. Some devices + have as many as four PTP related interrupts. + +Clock Properties: + + - fsl,tclk-period Timer reference clock period in nanoseconds. + - fsl,tmr-prsc Prescaler, divides the output clock. + - fsl,tmr-add Frequency compensation value. + - fsl,tmr-fiper1 Fixed interval period pulse generator. + - fsl,tmr-fiper2 Fixed interval period pulse generator. + - fsl,max-adj Maximum frequency adjustment in parts per billion. + + These properties set the operational parameters for the PTP + clock. You must choose these carefully for the clock to work right. + Here is how to figure good values: + + TimerOsc = system clock MHz + tclk_period = desired clock period nanoseconds + NominalFreq = 1000 / tclk_period MHz + FreqDivRatio = TimerOsc / NominalFreq (must be greater that 1.0) + tmr_add = ceil(2^32 / FreqDivRatio) + OutputClock = NominalFreq / tmr_prsc MHz + PulseWidth = 1 / OutputClock microseconds + FiperFreq1 = desired frequency in Hz + FiperDiv1 = 1000000 * OutputClock / FiperFreq1 + tmr_fiper1 = tmr_prsc * tclk_period * FiperDiv1 - tclk_period + max_adj = 1000000000 * (FreqDivRatio - 1.0) - 1 + + The calculation for tmr_fiper2 is the same as for tmr_fiper1. The + driver expects that tmr_fiper1 will be correctly set to produce a 1 + Pulse Per Second (PPS) signal, since this will be offered to the PPS + subsystem to synchronize the Linux clock. + +Example: + + ptp_clock@24E00 { + compatible = "fsl,etsec-ptp"; + reg = <0x24E00 0xB0>; + interrupts = <12 0x8 13 0x8>; + interrupt-parent = < &ipic >; + fsl,tclk-period = <10>; + fsl,tmr-prsc = <100>; + fsl,tmr-add = <0x999999A4>; + fsl,tmr-fiper1 = <0x3B9AC9F6>; + fsl,tmr-fiper2 = <0x00018696>; + fsl,max-adj = <659999998>; + }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt b/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt new file mode 100644 index 000000000000..939a26d541f6 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/ifc.txt @@ -0,0 +1,76 @@ +Integrated Flash Controller + +Properties: +- name : Should be ifc +- compatible : should contain "fsl,ifc". The version of the integrated + flash controller can be found in the IFC_REV register at + offset zero. + +- #address-cells : Should be either two or three. The first cell is the + chipselect number, and the remaining cells are the + offset into the chipselect. +- #size-cells : Either one or two, depending on how large each chipselect + can be. +- reg : Offset and length of the register set for the device +- interrupts : IFC has two interrupts. The first one is the "common" + interrupt(CM_EVTER_STAT), and second is the NAND interrupt + (NAND_EVTER_STAT). + +- ranges : Each range corresponds to a single chipselect, and covers + the entire access window as configured. + +Child device nodes describe the devices connected to IFC such as NOR (e.g. +cfi-flash) and NAND (fsl,ifc-nand). There might be board specific devices +like FPGAs, CPLDs, etc. + +Example: + + ifc@ffe1e000 { + compatible = "fsl,ifc", "simple-bus"; + #address-cells = <2>; + #size-cells = <1>; + reg = <0x0 0xffe1e000 0 0x2000>; + interrupts = <16 2 19 2>; + + /* NOR, NAND Flashes and CPLD on board */ + ranges = <0x0 0x0 0x0 0xee000000 0x02000000 + 0x1 0x0 0x0 0xffa00000 0x00010000 + 0x3 0x0 0x0 0xffb00000 0x00020000>; + + flash@0,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "cfi-flash"; + reg = <0x0 0x0 0x2000000>; + bank-width = <2>; + device-width = <1>; + + partition@0 { + /* 32MB for user data */ + reg = <0x0 0x02000000>; + label = "NOR Data"; + }; + }; + + flash@1,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,ifc-nand"; + reg = <0x1 0x0 0x10000>; + + partition@0 { + /* This location must not be altered */ + /* 1MB for u-boot Bootloader Image */ + reg = <0x0 0x00100000>; + label = "NAND U-Boot Image"; + read-only; + }; + }; + + cpld@3,0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "fsl,p1010rdb-cpld"; + reg = <0x3 0x0 0x000001f>; + }; + }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt new file mode 100644 index 000000000000..df41958140e8 --- /dev/null +++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt @@ -0,0 +1,38 @@ +* Freescale MPIC timers + +Required properties: +- compatible: "fsl,mpic-global-timer" + +- reg : Contains two regions. The first is the main timer register bank + (GTCCRxx, GTBCRxx, GTVPRxx, GTDRxx). The second is the timer control + register (TCRx) for the group. + +- fsl,available-ranges: use style section to define which + timer interrupts can be used. This property is optional; without this, + all timers within the group can be used. + +- interrupts: one interrupt per timer in the group, in order, starting + with timer zero. If timer-available-ranges is present, only the + interrupts that correspond to available timers shall be present. + +Example: + /* Note that this requires #interrupt-cells to be 4 */ + timer0: timer@41100 { + compatible = "fsl,mpic-global-timer"; + reg = <0x41100 0x100 0x41300 4>; + + /* Another AMP partition is using timers 0 and 1 */ + fsl,available-ranges = <2 2>; + + interrupts = <2 0 3 0 + 3 0 3 0>; + }; + + timer1: timer@42100 { + compatible = "fsl,mpic-global-timer"; + reg = <0x42100 0x100 0x42300 4>; + interrupts = <4 0 3 0 + 5 0 3 0 + 6 0 3 0 + 7 0 3 0>; + }; diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt index 4f6145859aab..2cf38bd841fd 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt @@ -190,7 +190,7 @@ EXAMPLE 4 */ timer0: timer@41100 { compatible = "fsl,mpic-global-timer"; - reg = <0x41100 0x100>; + reg = <0x41100 0x100 0x41300 4>; interrupts = <0 0 3 0 1 0 3 0 2 0 3 0 diff --git a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt index a7e155a023b8..36afa322b04b 100644 --- a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt +++ b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt @@ -127,7 +127,7 @@ Nintendo Wii device tree - reg : should contain the SDHCI registers location and length - interrupts : should contain the SDHCI interrupt -1.j) The Inter-Processsor Communication (IPC) node +1.j) The Inter-Processor Communication (IPC) node Represent the Inter-Processor Communication interface. This interface enables communications between the Broadway and the Starlet processors. diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt index 50619a0720a8..7c1329de0596 100644 --- a/Documentation/devicetree/booting-without-of.txt +++ b/Documentation/devicetree/booting-without-of.txt @@ -12,8 +12,9 @@ Table of Contents ================= I - Introduction - 1) Entry point for arch/powerpc - 2) Entry point for arch/x86 + 1) Entry point for arch/arm + 2) Entry point for arch/powerpc + 3) Entry point for arch/x86 II - The DT block format 1) Header @@ -148,7 +149,46 @@ upgrades without significantly impacting the kernel code or cluttering it with special cases. -1) Entry point for arch/powerpc +1) Entry point for arch/arm +--------------------------- + + There is one single entry point to the kernel, at the start + of the kernel image. That entry point supports two calling + conventions. A summary of the interface is described here. A full + description of the boot requirements is documented in + Documentation/arm/Booting + + a) ATAGS interface. Minimal information is passed from firmware + to the kernel with a tagged list of predefined parameters. + + r0 : 0 + + r1 : Machine type number + + r2 : Physical address of tagged list in system RAM + + b) Entry with a flattened device-tree block. Firmware loads the + physical address of the flattened device tree block (dtb) into r2, + r1 is not used, but it is considered good practise to use a valid + machine number as described in Documentation/arm/Booting. + + r0 : 0 + + r1 : Valid machine type number. When using a device tree, + a single machine type number will often be assigned to + represent a class or family of SoCs. + + r2 : physical pointer to the device-tree block + (defined in chapter II) in RAM. Device tree can be located + anywhere in system RAM, but it should be aligned on a 64 bit + boundary. + + The kernel will differentiate between ATAGS and device tree booting by + reading the memory pointed to by r2 and looking for either the flattened + device tree block magic value (0xd00dfeed) or the ATAG_CORE value at + offset 0x4 from r2 (0x54410001). + +2) Entry point for arch/powerpc ------------------------------- There is one single entry point to the kernel, at the start @@ -226,7 +266,7 @@ it with special cases. cannot support both configurations with Book E and configurations with classic Powerpc architectures. -2) Entry point for arch/x86 +3) Entry point for arch/x86 ------------------------------- There is one single 32bit entry point to the kernel at code32_start, diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt index 0c1c2f63c0a9..5a0cb1ef6164 100644 --- a/Documentation/dmaengine.txt +++ b/Documentation/dmaengine.txt @@ -1 +1,96 @@ -See Documentation/crypto/async-tx-api.txt + DMA Engine API Guide + ==================== + + Vinod Koul + +NOTE: For DMA Engine usage in async_tx please see: + Documentation/crypto/async-tx-api.txt + + +Below is a guide to device driver writers on how to use the Slave-DMA API of the +DMA Engine. This is applicable only for slave DMA usage only. + +The slave DMA usage consists of following steps +1. Allocate a DMA slave channel +2. Set slave and controller specific parameters +3. Get a descriptor for transaction +4. Submit the transaction and wait for callback notification + +1. Allocate a DMA slave channel +Channel allocation is slightly different in the slave DMA context, client +drivers typically need a channel from a particular DMA controller only and even +in some cases a specific channel is desired. To request a channel +dma_request_channel() API is used. + +Interface: +struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); +where dma_filter_fn is defined as: +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + +When the optional 'filter_fn' parameter is set to NULL dma_request_channel +simply returns the first channel that satisfies the capability mask. Otherwise, +when the mask parameter is insufficient for specifying the necessary channel, +the filter_fn routine can be used to disposition the available channels in the +system. The filter_fn routine is called once for each free channel in the +system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags +that channel to be the return value from dma_request_channel. A channel +allocated via this interface is exclusive to the caller, until +dma_release_channel() is called. + +2. Set slave and controller specific parameters +Next step is always to pass some specific information to the DMA driver. Most of +the generic information which a slave DMA can use is in struct dma_slave_config. +It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA +burst lengths etc. If some DMA controllers have more parameters to be sent then +they should try to embed struct dma_slave_config in their controller specific +structure. That gives flexibility to client to pass more parameters, if +required. + +Interface: +int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + +3. Get a descriptor for transaction +For slave usage the various modes of slave transfers supported by the +DMA-engine are: +slave_sg - DMA a list of scatter gather buffers from/to a peripheral +dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the + operation is explicitly stopped. +The non NULL return of this transfer API represents a "descriptor" for the given +transaction. + +Interface: +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags); +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction); + +4. Submit the transaction and wait for callback notification +To schedule the transaction to be scheduled by dma device, the "descriptor" +returned in above (3) needs to be submitted. +To tell the dma driver that a transaction is ready to be serviced, the +descriptor->submit() callback needs to be invoked. This chains the descriptor to +the pending queue. +The transactions in the pending queue can be activated by calling the +issue_pending API. If channel is idle then the first transaction in queue is +started and subsequent ones queued up. +On completion of the DMA operation the next in queue is submitted and a tasklet +triggered. The tasklet would then call the client driver completion callback +routine for notification, if set. +Interface: +void dma_async_issue_pending(struct dma_chan *chan); + +============================================================================== + +Additional usage notes for dma driver writers +1/ Although DMA engine specifies that completion callback routines cannot submit +any new operations, but typically for slave DMA subsequent transaction may not +be available for submit prior to callback routine being called. This requirement +is not a requirement for DMA-slave devices. But they should take care to drop +the spin-lock they might be holding before calling the callback routine diff --git a/Documentation/dontdiff b/Documentation/dontdiff index 470d3dba1a69..dfa6fc6e4b28 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -1,6 +1,8 @@ *.a *.aux *.bin +*.bz2 +*.cis *.cpio *.csp *.dsp @@ -8,6 +10,8 @@ *.elf *.eps *.fw +*.gcno +*.gcov *.gen.S *.gif *.grep @@ -19,14 +23,20 @@ *.ko *.log *.lst +*.lzma +*.lzo +*.mo *.moc *.mod.c *.o *.o.* +*.order *.orig *.out +*.patch *.pdf *.png +*.pot *.ps *.rej *.s @@ -39,16 +49,22 @@ *.tex *.ver *.xml +*.xz *_MODULES *_vga16.c *~ +\#*# *.9 -*.9.gz .* +.*.d .mm 53c700_d.h CVS ChangeSet +GPATH +GRTAGS +GSYMS +GTAGS Image Kerntypes Module.markers @@ -57,15 +73,14 @@ PENDING SCCS System.map* TAGS +aconf +af_names.h aic7*reg.h* aic7*reg_print.c* aic7*seq.h* aicasm aicdb.h* -altivec1.c -altivec2.c -altivec4.c -altivec8.c +altivec*.c asm-offsets.h asm_offsets.h autoconf.h* @@ -80,6 +95,7 @@ btfixupprep build bvmlinux bzImage* +capability_names.h capflags.c classlist.h* comp*.log @@ -88,7 +104,8 @@ conf config config-* config_data.h* -config_data.gz* +config.mak +config.mak.autogen conmakehash consolemap_deftbl.c* cpustr.h @@ -96,7 +113,9 @@ crc32table.h* cscope.* defkeymap.c devlist.h* +dnotify_test docproc +dslm elf2ecoff elfconfig.h* evergreen_reg_safe.h @@ -105,6 +124,7 @@ flask.h fore200e_mkfirm fore200e_pca_fw.c* gconf +gconf.glade.h gen-devlist gen_crc32table gen_init_cpio @@ -112,11 +132,12 @@ generated genheaders genksyms *_gray256.c +hpet_example +hugepage-mmap +hugepage-shm ihex2fw ikconfig.h* inat-tables.c -initramfs_data.cpio -initramfs_data.cpio.gz initramfs_list int16.c int1.c @@ -133,15 +154,19 @@ kxgettext lkc_defs.h lex.c lex.*.c +linux logo_*.c logo_*_clut224.c logo_*_mono.c lxdialog +mach mach-types mach-types.h machtypes.h map +map_hugetlb maui_boot.h +media mconf miboot* mk_elfconfig @@ -150,23 +175,29 @@ mkbugboot mkcpustr mkdep mkprep +mkregtable mktables mktree modpost modules.builtin modules.order modversions.h* +nconf ncscope.* offset.h offsets.h oui.c* +page-types parse.c parse.h patches* pca200e.bin pca200e_ecd.bin2 -piggy.gz +perf.data +perf.data.old +perf-archive piggyback +piggy.gzip piggy.S pnmtologo ppc_defs.h* @@ -177,10 +208,9 @@ r200_reg_safe.h r300_reg_safe.h r420_reg_safe.h r600_reg_safe.h -raid6altivec*.c -raid6int*.c -raid6tables.c +recordmcount relocs +rlim_names.h rn50_reg_safe.h rs600_reg_safe.h rv515_reg_safe.h @@ -194,6 +224,7 @@ split-include syscalltab.h tables.c tags +test_get_len tftpboot.img timeconst.h times.h* @@ -210,10 +241,13 @@ vdso32.so.dbg vdso64.lds vdso64.so.dbg version.h* +vmImage vmlinux vmlinux-* vmlinux.aout +vmlinux.bin.all vmlinux.lds +vmlinuz voffset.h vsyscall.lds vsyscall_32.lds diff --git a/Documentation/driver-model/bus.txt b/Documentation/driver-model/bus.txt index 5001b7511626..6754b2df8aa1 100644 --- a/Documentation/driver-model/bus.txt +++ b/Documentation/driver-model/bus.txt @@ -3,24 +3,7 @@ Bus Types Definition ~~~~~~~~~~ - -struct bus_type { - char * name; - - struct subsystem subsys; - struct kset drivers; - struct kset devices; - - struct bus_attribute * bus_attrs; - struct device_attribute * dev_attrs; - struct driver_attribute * drv_attrs; - - int (*match)(struct device * dev, struct device_driver * drv); - int (*hotplug) (struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size); - int (*suspend)(struct device * dev, pm_message_t state); - int (*resume)(struct device * dev); -}; +See the kerneldoc for the struct bus_type. int bus_register(struct bus_type * bus); diff --git a/Documentation/driver-model/class.txt b/Documentation/driver-model/class.txt index 548505f14aa4..1fefc480a80b 100644 --- a/Documentation/driver-model/class.txt +++ b/Documentation/driver-model/class.txt @@ -27,22 +27,7 @@ The device class structure looks like: typedef int (*devclass_add)(struct device *); typedef void (*devclass_remove)(struct device *); -struct device_class { - char * name; - rwlock_t lock; - u32 devnum; - struct list_head node; - - struct list_head drivers; - struct list_head intf_list; - - struct driver_dir_entry dir; - struct driver_dir_entry device_dir; - struct driver_dir_entry driver_dir; - - devclass_add add_device; - devclass_remove remove_device; -}; +See the kerneldoc for the struct class. A typical device class definition would look like: diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt index a124f3126b0d..b2ff42685bcb 100644 --- a/Documentation/driver-model/device.txt +++ b/Documentation/driver-model/device.txt @@ -2,96 +2,7 @@ The Basic Device Structure ~~~~~~~~~~~~~~~~~~~~~~~~~~ -struct device { - struct list_head g_list; - struct list_head node; - struct list_head bus_list; - struct list_head driver_list; - struct list_head intf_list; - struct list_head children; - struct device * parent; - - char name[DEVICE_NAME_SIZE]; - char bus_id[BUS_ID_SIZE]; - - spinlock_t lock; - atomic_t refcount; - - struct bus_type * bus; - struct driver_dir_entry dir; - - u32 class_num; - - struct device_driver *driver; - void *driver_data; - void *platform_data; - - u32 current_state; - unsigned char *saved_state; - - void (*release)(struct device * dev); -}; - -Fields -~~~~~~ -g_list: Node in the global device list. - -node: Node in device's parent's children list. - -bus_list: Node in device's bus's devices list. - -driver_list: Node in device's driver's devices list. - -intf_list: List of intf_data. There is one structure allocated for - each interface that the device supports. - -children: List of child devices. - -parent: *** FIXME *** - -name: ASCII description of device. - Example: " 3Com Corporation 3c905 100BaseTX [Boomerang]" - -bus_id: ASCII representation of device's bus position. This - field should be a name unique across all devices on the - bus type the device belongs to. - - Example: PCI bus_ids are in the form of - :. - This name is unique across all PCI devices in the system. - -lock: Spinlock for the device. - -refcount: Reference count on the device. - -bus: Pointer to struct bus_type that device belongs to. - -dir: Device's sysfs directory. - -class_num: Class-enumerated value of the device. - -driver: Pointer to struct device_driver that controls the device. - -driver_data: Driver-specific data. - -platform_data: Platform data specific to the device. - - Example: for devices on custom boards, as typical of embedded - and SOC based hardware, Linux often uses platform_data to point - to board-specific structures describing devices and how they - are wired. That can include what ports are available, chip - variants, which GPIO pins act in what additional roles, and so - on. This shrinks the "Board Support Packages" (BSPs) and - minimizes board-specific #ifdefs in drivers. - -current_state: Current power state of the device. - -saved_state: Pointer to saved state of the device. This is usable by - the device driver controlling the device. - -release: Callback to free the device after all references have - gone away. This should be set by the allocator of the - device (i.e. the bus driver that discovered the device). +See the kerneldoc for the struct device. Programming Interface diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt index d2cd6fb8ba9e..4421135826a2 100644 --- a/Documentation/driver-model/driver.txt +++ b/Documentation/driver-model/driver.txt @@ -1,23 +1,7 @@ Device Drivers -struct device_driver { - char * name; - struct bus_type * bus; - - struct completion unloaded; - struct kobject kobj; - list_t devices; - - struct module *owner; - - int (*probe) (struct device * dev); - int (*remove) (struct device * dev); - - int (*suspend) (struct device * dev, pm_message_t state); - int (*resume) (struct device * dev); -}; - +See the kerneldoc for the struct device_driver. Allocation diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 492e81df2968..72e238465b0b 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,42 @@ be removed from this file. --------------------------- +What: x86 floppy disable_hlt +When: 2012 +Why: ancient workaround of dubious utility clutters the + code used by everybody else. +Who: Len Brown + +--------------------------- + +What: CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle +When: 2012 +Why: This optional sub-feature of APM is of dubious reliability, + and ancient APM laptops are likely better served by calling HLT. + Deleting CONFIG_APM_CPU_IDLE allows x86 to stop exporting + the pm_idle function pointer to modules. +Who: Len Brown + +---------------------------- + +What: x86_32 "no-hlt" cmdline param +When: 2012 +Why: remove a branch from idle path, simplify code used by everybody. + This option disabled the use of HLT in idle and machine_halt() + for hardware that was flakey 15-years ago. Today we have + "idle=poll" that removed HLT from idle, and so if such a machine + is still running the upstream kernel, "idle=poll" is likely sufficient. +Who: Len Brown + +---------------------------- + +What: x86 "idle=mwait" cmdline param +When: 2012 +Why: simplify x86 idle code +Who: Len Brown + +---------------------------- + What: PRISM54 When: 2.6.34 @@ -35,17 +71,6 @@ Who: Luis R. Rodriguez --------------------------- -What: AR9170USB -When: 2.6.40 - -Why: This driver is deprecated and the firmware is no longer - maintained. The replacement driver "carl9170" has been - around for a while, so the devices are still supported. - -Who: Christian Lamparter - ---------------------------- - What: IRQF_SAMPLE_RANDOM Check: IRQF_SAMPLE_RANDOM When: July 2009 @@ -226,7 +251,7 @@ Who: Zhang Rui What: CONFIG_ACPI_PROCFS_POWER When: 2.6.39 Why: sysfs I/F for ACPI power devices, including AC and Battery, - has been working in upstream kenrel since 2.6.24, Sep 2007. + has been working in upstream kernel since 2.6.24, Sep 2007. In 2.6.37, we make the sysfs I/F always built in and this option disabled by default. Remove this option and the ACPI power procfs interface in 2.6.39. @@ -273,16 +298,6 @@ Who: Michael Buesch --------------------------- -What: /sys/o2cb symlink -When: January 2010 -Why: /sys/fs/o2cb is the proper location for this information - /sys/o2cb - exists as a symlink for backwards compatibility for old versions of - ocfs2-tools. 2 years should be sufficient time to phase in new versions - which know to look in /sys/fs/o2cb. -Who: ocfs2-devel@oss.oracle.com - ---------------------------- - What: Ability for non root users to shm_get hugetlb pages based on mlock resource limits When: 2.6.31 @@ -405,16 +420,6 @@ Who: anybody or Florian Mickler ---------------------------- -What: capifs -When: February 2011 -Files: drivers/isdn/capi/capifs.* -Why: udev fully replaces this special file system that only contains CAPI - NCCI TTY device nodes. User space (pppdcapiplugin) works without - noticing the difference. -Who: Jan Kiszka - ----------------------------- - What: KVM paravirt mmu host support When: January 2011 Why: The paravirt mmu host support is slower than non-paravirt mmu, both @@ -460,14 +465,6 @@ Who: Thomas Gleixner ---------------------------- -What: The acpi_sleep=s4_nonvs command line option -When: 2.6.37 -Files: arch/x86/kernel/acpi/sleep.c -Why: superseded by acpi_sleep=nonvs -Who: Rafael J. Wysocki - ----------------------------- - What: PCI DMA unmap state API When: August 2012 Why: PCI DMA unmap state API (include/linux/pci-dma.h) was replaced @@ -484,23 +481,6 @@ Who: FUJITA Tomonori ---------------------------- -What: namespace cgroup (ns_cgroup) -When: 2.6.38 -Why: The ns_cgroup leads to some problems: - * cgroup creation is out-of-control - * cgroup name can conflict when pids are looping - * it is not possible to have a single process handling - a lot of namespaces without falling in a exponential creation time - * we may want to create a namespace without creating a cgroup - - The ns_cgroup is replaced by a compatibility flag 'clone_children', - where a newly created cgroup will copy the parent cgroup values. - The userspace has to manually create a cgroup and add a task to - the 'tasks' file. -Who: Daniel Lezcano - ----------------------------- - What: iwlwifi disable_hw_scan module parameters When: 2.6.40 Why: Hareware scan is the prefer method for iwlwifi devices for @@ -580,3 +560,26 @@ Why: These legacy callbacks should no longer be used as i2c-core offers Who: Jean Delvare ---------------------------- + +What: Support for UVCIOC_CTRL_ADD in the uvcvideo driver +When: 2.6.42 +Why: The information passed to the driver by this ioctl is now queried + dynamically from the device. +Who: Laurent Pinchart + +---------------------------- + +What: Support for UVCIOC_CTRL_MAP_OLD in the uvcvideo driver +When: 2.6.42 +Why: Used only by applications compiled against older driver versions. + Superseded by UVCIOC_CTRL_MAP which supports V4L2 menu controls. +Who: Laurent Pinchart + +---------------------------- + +What: Support for UVCIOC_CTRL_GET and UVCIOC_CTRL_SET in the uvcvideo driver +When: 2.6.42 +Why: Superseded by the UVCIOC_CTRL_QUERY ioctl. +Who: Laurent Pinchart + +---------------------------- diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt index b22abba78fed..13de64c7f0ab 100644 --- a/Documentation/filesystems/9p.txt +++ b/Documentation/filesystems/9p.txt @@ -25,6 +25,8 @@ Other applications are described in the following papers: http://xcpu.org/papers/cellfs-talk.pdf * PROSE I/O: Using 9p to enable Application Partitions http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf + * VirtFS: A Virtualization Aware File System pass-through + http://goo.gl/3WPDg USAGE ===== @@ -130,31 +132,20 @@ OPTIONS RESOURCES ========= -Our current recommendation is to use Inferno (http://www.vitanuova.com/nferno/index.html) -as the 9p server. You can start a 9p server under Inferno by issuing the -following command: - ; styxlisten -A tcp!*!564 export '#U*' +Protocol specifications are maintained on github: +http://ericvh.github.com/9p-rfc/ -The -A specifies an unauthenticated export. The 564 is the port # (you may -have to choose a higher port number if running as a normal user). The '#U*' -specifies exporting the root of the Linux name space. You may specify a -subset of the namespace by extending the path: '#U*'/tmp would just export -/tmp. For more information, see the Inferno manual pages covering styxlisten -and export. +9p client and server implementations are listed on +http://9p.cat-v.org/implementations -A Linux version of the 9p server is now maintained under the npfs project -on sourceforge (http://sourceforge.net/projects/npfs). The currently -maintained version is the single-threaded version of the server (named spfs) -available from the same SVN repository. +A 9p2000.L server is being developed by LLNL and can be found +at http://code.google.com/p/diod/ There are user and developer mailing lists available through the v9fs project on sourceforge (http://sourceforge.net/projects/v9fs). -A stand-alone version of the module (which should build for any 2.6 kernel) -is available via (http://github.com/ericvh/9p-sac/tree/master) - -News and other information is maintained on SWiK (http://swik.net/v9fs) -and the Wiki (http://sf.net/apps/mediawiki/v9fs/index.php). +News and other information is maintained on a Wiki. +(http://sf.net/apps/mediawiki/v9fs/index.php). Bug reports may be issued through the kernel.org bugzilla (http://bugzilla.kernel.org) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 61b31acb9176..57d827d6071d 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -104,7 +104,7 @@ of the locking scheme for directory operations. prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); @@ -126,7 +126,7 @@ locking rules: s_umount alloc_inode: destroy_inode: -dirty_inode: (must not sleep) +dirty_inode: write_inode: drop_inode: !!!inode->i_lock!!! evict_inode: diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c index fd53869f5633..1420233dfa55 100644 --- a/Documentation/filesystems/configfs/configfs_example_explicit.c +++ b/Documentation/filesystems/configfs/configfs_example_explicit.c @@ -464,9 +464,8 @@ static int __init configfs_example_init(void) return 0; out_unregister: - for (; i >= 0; i--) { + for (i--; i >= 0; i--) configfs_unregister_subsystem(example_subsys[i]); - } return ret; } @@ -475,9 +474,8 @@ static void __exit configfs_example_exit(void) { int i; - for (i = 0; example_subsys[i]; i++) { + for (i = 0; example_subsys[i]; i++) configfs_unregister_subsystem(example_subsys[i]); - } } module_init(configfs_example_init); diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c index d8e30a0378aa..327dfbc640a9 100644 --- a/Documentation/filesystems/configfs/configfs_example_macros.c +++ b/Documentation/filesystems/configfs/configfs_example_macros.c @@ -427,9 +427,8 @@ static int __init configfs_example_init(void) return 0; out_unregister: - for (; i >= 0; i--) { + for (i--; i >= 0; i--) configfs_unregister_subsystem(example_subsys[i]); - } return ret; } @@ -438,9 +437,8 @@ static void __exit configfs_example_exit(void) { int i; - for (i = 0; example_subsys[i]; i++) { + for (i = 0; example_subsys[i]; i++) configfs_unregister_subsystem(example_subsys[i]); - } } module_init(configfs_example_init); diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index c79ec58fd7f6..3ae9bc94352a 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -226,10 +226,6 @@ acl Enables POSIX Access Control Lists support. noacl This option disables POSIX Access Control List support. -reservation - -noreservation - bsddf (*) Make 'df' act like BSD. minixdf Make 'df' act like Minix. diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt index b9b4192ea8b5..9c8fd6148656 100644 --- a/Documentation/filesystems/nfs/idmapper.txt +++ b/Documentation/filesystems/nfs/idmapper.txt @@ -47,8 +47,8 @@ request-key will find the first matching line and corresponding program. In this case, /some/other/program will handle all uid lookups and /usr/sbin/nfs.idmap will handle gid, user, and group lookups. -See for more information about the -request-key function. +See for more information +about the request-key function. ========= diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt index 9ed920a8cd79..7618a287aa41 100644 --- a/Documentation/filesystems/ocfs2.txt +++ b/Documentation/filesystems/ocfs2.txt @@ -46,9 +46,15 @@ errors=panic Panic and halt the machine if an error occurs. intr (*) Allow signals to interrupt cluster operations. nointr Do not allow signals to interrupt cluster operations. +noatime Do not update access time. +relatime(*) Update atime if the previous atime is older than + mtime or ctime +strictatime Always update atime, but the minimum update interval + is specified by atime_quantum. atime_quantum=60(*) OCFS2 will not update atime unless this number of seconds has passed since the last update. - Set to zero to always update atime. + Set to zero to always update atime. This option need + work with strictatime. data=ordered (*) All data are forced directly out to the main file system prior to its metadata being committed to the journal. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index b0b814d75ca1..db3b1aba32a3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -574,6 +574,12 @@ The contents of each smp_affinity file is the same by default: > cat /proc/irq/0/smp_affinity ffffffff +There is an alternate interface, smp_affinity_list which allows specifying +a cpu range instead of a bitmask: + + > cat /proc/irq/0/smp_affinity_list + 1024-1031 + The default_smp_affinity mask applies to all non-active IRQs, which are the IRQs which have not yet been allocated/activated, and hence which lack a /proc/irq/[0-9]* directory. @@ -583,12 +589,13 @@ reports itself as being attached. This hardware locality information does not include information about any possible driver locality preference. prof_cpu_mask specifies which CPUs are to be profiled by the system wide -profiler. Default value is ffffffff (all cpus). +profiler. Default value is ffffffff (all cpus if there are only 32 of them). The way IRQs are routed is handled by the IO-APIC, and it's Round Robin between all the CPUs which are allowed to handle it. As usual the kernel has more info than you and does a better job than you, so the defaults are the -best choice for almost everyone. +best choice for almost everyone. [Note this applies only to those IO-APIC's +that support "Round Robin" interrupt distribution.] There are three more important subdirectories in /proc: net, scsi, and sys. The general rule is that the contents, or even the existence of these diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index d7b13b01e980..8e4fab639d9c 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -115,28 +115,8 @@ ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs Module Parameters for Debugging =============================== -When UBIFS has been compiled with debugging enabled, there are 3 module +When UBIFS has been compiled with debugging enabled, there are 2 module parameters that are available to control aspects of testing and debugging. -The parameters are unsigned integers where each bit controls an option. -The parameters are: - -debug_msgs Selects which debug messages to display, as follows: - - Message Type Flag value - - General messages 1 - Journal messages 2 - Mount messages 4 - Commit messages 8 - LEB search messages 16 - Budgeting messages 32 - Garbage collection messages 64 - Tree Node Cache (TNC) messages 128 - LEB properties (lprops) messages 256 - Input/output messages 512 - Log messages 1024 - Scan messages 2048 - Recovery messages 4096 debug_chks Selects extra checks that UBIFS can do while running: @@ -154,11 +134,9 @@ debug_tsts Selects a mode of testing, as follows: Test mode Flag value - Force in-the-gaps method 2 Failure mode for recovery testing 4 -For example, set debug_msgs to 5 to display General messages and Mount -messages. +For example, set debug_chks to 3 to enable general and TNC checks. References diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 21a7dc467bba..88b9f5519af9 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -211,7 +211,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, int); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt index 7bff3e4f35df..3fc0c31a6f5d 100644 --- a/Documentation/filesystems/xfs.txt +++ b/Documentation/filesystems/xfs.txt @@ -39,6 +39,12 @@ When mounting an XFS filesystem, the following options are accepted. drive level write caching to be enabled, for devices that support write barriers. + discard + Issue command to let the block device reclaim space freed by the + filesystem. This is useful for SSD devices, thinly provisioned + LUNs and virtual machine images, but may have a performance + impact. This option is incompatible with the nodelaylog option. + dmapi Enable the DMAPI (Data Management API) event callouts. Use with the "mtpt" option. diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index cb8a3a00cc92..df904aec9904 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt @@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before entering atomic context, using: int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int end, gfp_t flags); + unsigned int nr_elements, gfp_t flags); This function will ensure that memory for the elements indexed in the range -defined by start and end has been allocated. Thereafter, a +defined by start and nr_elements has been allocated. Thereafter, a flex_array_put() call on an element in that range is guaranteed not to block. diff --git a/Documentation/usb/hiddev.txt b/Documentation/hid/hiddev.txt similarity index 100% rename from Documentation/usb/hiddev.txt rename to Documentation/hid/hiddev.txt diff --git a/Documentation/hid/hidraw.txt b/Documentation/hid/hidraw.txt new file mode 100644 index 000000000000..029e6cb9a7e8 --- /dev/null +++ b/Documentation/hid/hidraw.txt @@ -0,0 +1,119 @@ + HIDRAW - Raw Access to USB and Bluetooth Human Interface Devices + ================================================================== + +The hidraw driver provides a raw interface to USB and Bluetooth Human +Interface Devices (HIDs). It differs from hiddev in that reports sent and +received are not parsed by the HID parser, but are sent to and received from +the device unmodified. + +Hidraw should be used if the userspace application knows exactly how to +communicate with the hardware device, and is able to construct the HID +reports manually. This is often the case when making userspace drivers for +custom HID devices. + +Hidraw is also useful for communicating with non-conformant HID devices +which send and receive data in a way that is inconsistent with their report +descriptors. Because hiddev parses reports which are sent and received +through it, checking them against the device's report descriptor, such +communication with these non-conformant devices is impossible using hiddev. +Hidraw is the only alternative, short of writing a custom kernel driver, for +these non-conformant devices. + +A benefit of hidraw is that its use by userspace applications is independent +of the underlying hardware type. Currently, Hidraw is implemented for USB +and Bluetooth. In the future, as new hardware bus types are developed which +use the HID specification, hidraw will be expanded to add support for these +new bus types. + +Hidraw uses a dynamic major number, meaning that udev should be relied on to +create hidraw device nodes. Udev will typically create the device nodes +directly under /dev (eg: /dev/hidraw0). As this location is distribution- +and udev rule-dependent, applications should use libudev to locate hidraw +devices attached to the system. There is a tutorial on libudev with a +working example at: + http://www.signal11.us/oss/udev/ + +The HIDRAW API +--------------- + +read() +------- +read() will read a queued report received from the HID device. On USB +devices, the reports read using read() are the reports sent from the device +on the INTERRUPT IN endpoint. By default, read() will block until there is +a report available to be read. read() can be made non-blocking, by passing +the O_NONBLOCK flag to open(), or by setting the O_NONBLOCK flag using +fcntl(). + +On a device which uses numbered reports, the first byte of the returned data +will be the report number; the report data follows, beginning in the second +byte. For devices which do not use numbered reports, the report data +will begin at the first byte. + +write() +-------- +The write() function will write a report to the device. For USB devices, if +the device has an INTERRUPT OUT endpoint, the report will be sent on that +endpoint. If it does not, the report will be sent over the control endpoint, +using a SET_REPORT transfer. + +The first byte of the buffer passed to write() should be set to the report +number. If the device does not use numbered reports, the first byte should +be set to 0. The report data itself should begin at the second byte. + +ioctl() +-------- +Hidraw supports the following ioctls: + +HIDIOCGRDESCSIZE: Get Report Descriptor Size +This ioctl will get the size of the device's report descriptor. + +HIDIOCGRDESC: Get Report Descriptor +This ioctl returns the device's report descriptor using a +hidraw_report_descriptor struct. Make sure to set the size field of the +hidraw_report_descriptor struct to the size returned from HIDIOCGRDESCSIZE. + +HIDIOCGRAWINFO: Get Raw Info +This ioctl will return a hidraw_devinfo struct containing the bus type, the +vendor ID (VID), and product ID (PID) of the device. The bus type can be one +of: + BUS_USB + BUS_HIL + BUS_BLUETOOTH + BUS_VIRTUAL +which are defined in linux/input.h. + +HIDIOCGRAWNAME(len): Get Raw Name +This ioctl returns a string containing the vendor and product strings of +the device. The returned string is Unicode, UTF-8 encoded. + +HIDIOCGRAWPHYS(len): Get Physical Address +This ioctl returns a string representing the physical address of the device. +For USB devices, the string contains the physical path to the device (the +USB controller, hubs, ports, etc). For Bluetooth devices, the string +contains the hardware (MAC) address of the device. + +HIDIOCSFEATURE(len): Send a Feature Report +This ioctl will send a feature report to the device. Per the HID +specification, feature reports are always sent using the control endpoint. +Set the first byte of the supplied buffer to the report number. For devices +which do not use numbered reports, set the first byte to 0. The report data +begins in the second byte. Make sure to set len accordingly, to one more +than the length of the report (to account for the report number). + +HIDIOCGFEATURE(len): Get a Feature Report +This ioctl will request a feature report from the device using the control +endpoint. The first byte of the supplied buffer should be set to the report +number of the requested report. For devices which do not use numbered +reports, set the first byte to 0. The report will be returned starting at +the first byte of the buffer (ie: the report number is not returned). + +Example +--------- +In samples/, find hid-example.c, which shows examples of read(), write(), +and all the ioctls for hidraw. The code may be used by anyone for any +purpose, and can serve as a starting point for developing applications using +hidraw. + +Document by: + Alan Ott , Signal 11 Software diff --git a/Documentation/hwmon/adm1021 b/Documentation/hwmon/adm1021 index 03d02bfb3df1..02ad96cf9b2b 100644 --- a/Documentation/hwmon/adm1021 +++ b/Documentation/hwmon/adm1021 @@ -14,10 +14,6 @@ Supported chips: Prefix: 'gl523sm' Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e Datasheet: - * Intel Xeon Processor - Prefix: - any other - may require 'force_adm1021' parameter - Addresses scanned: none - Datasheet: Publicly available at Intel website * Maxim MAX1617 Prefix: 'max1617' Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e @@ -91,21 +87,27 @@ will do no harm, but will return 'old' values. It is possible to make ADM1021-clones do faster measurements, but there is really no good reason for that. -Xeon support ------------- -Some Xeon processors have real max1617, adm1021, or compatible chips -within them, with two temperature sensors. +Netburst-based Xeon support +--------------------------- -Other Xeons have chips with only one sensor. +Some Xeon processors based on the Netburst (early Pentium 4, from 2001 to +2003) microarchitecture had real MAX1617, ADM1021, or compatible chips +within them, with two temperature sensors. Other Xeon processors of this +era (with 400 MHz FSB) had chips with only one temperature sensor. -If you have a Xeon, and the adm1021 module loads, and both temperatures -appear valid, then things are good. +If you have such an old Xeon, and you get two valid temperatures when +loading the adm1021 module, then things are good. -If the adm1021 module doesn't load, you should try this: - modprobe adm1021 force_adm1021=BUS,ADDRESS - ADDRESS can only be 0x18, 0x1a, 0x29, 0x2b, 0x4c, or 0x4e. +If nothing happens when loading the adm1021 module, and you are certain +that your specific Xeon processor model includes compatible sensors, you +will have to explicitly instantiate the sensor chips from user-space. See +method 4 in Documentation/i2c/instantiating-devices. Possible slave +addresses are 0x18, 0x1a, 0x29, 0x2b, 0x4c, or 0x4e. It is likely that +only temp2 will be correct and temp1 will have to be ignored. -If you have dual Xeons you may have appear to have two separate -adm1021-compatible chips, or two single-temperature sensors, at distinct -addresses. +Previous generations of the Xeon processor (based on Pentium II/III) +didn't have these sensors. Next generations of Xeon processors (533 MHz +FSB and faster) lost them, until the Core-based generation which +introduced integrated digital thermal sensors. These are supported by +the coretemp driver. diff --git a/Documentation/hwmon/adm1275 b/Documentation/hwmon/adm1275 new file mode 100644 index 000000000000..6a3a6476cf20 --- /dev/null +++ b/Documentation/hwmon/adm1275 @@ -0,0 +1,60 @@ +Kernel driver adm1275 +===================== + +Supported chips: + * Analog Devices ADM1275 + Prefix: 'adm1275' + Addresses scanned: - + Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1275.pdf + +Author: Guenter Roeck + + +Description +----------- + +This driver supports hardware montoring for Analog Devices ADM1275 Hot-Swap +Controller and Digital Power Monitor. + +The ADM1275 is a hot-swap controller that allows a circuit board to be removed +from or inserted into a live backplane. It also features current and voltage +readback via an integrated 12-bit analog-to-digital converter (ADC), accessed +using a PMBus. interface. + +The driver is a client driver to the core PMBus driver. Please see +Documentation/hwmon/pmbus for details on PMBus client drivers. + + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Platform data support +--------------------- + +The driver supports standard PMBus driver platform data. Please see +Documentation/hwmon/pmbus for details. + + +Sysfs entries +------------- + +The following attributes are supported. Limits are read-write; all other +attributes are read-only. + +in1_label "vin1" or "vout1" depending on chip variant and + configuration. +in1_input Measured voltage. From READ_VOUT register. +in1_min Minumum Voltage. From VOUT_UV_WARN_LIMIT register. +in1_max Maximum voltage. From VOUT_OV_WARN_LIMIT register. +in1_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status. +in1_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status. + +curr1_label "iout1" +curr1_input Measured current. From READ_IOUT register. +curr1_max Maximum current. From IOUT_OC_WARN_LIMIT register. +curr1_max_alarm Current high alarm. From IOUT_OC_WARN_LIMIT register. diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp index 25568f844804..f85e913a3401 100644 --- a/Documentation/hwmon/coretemp +++ b/Documentation/hwmon/coretemp @@ -15,8 +15,13 @@ Author: Rudolf Marek Description ----------- +This driver permits reading the DTS (Digital Temperature Sensor) embedded +inside Intel CPUs. This driver can read both the per-core and per-package +temperature using the appropriate sensors. The per-package sensor is new; +as of now, it is present only in the SandyBridge platform. The driver will +show the temperature of all cores inside a package under a single device +directory inside hwmon. -This driver permits reading temperature sensor embedded inside Intel Core CPU. Temperature is measured in degrees Celsius and measurement resolution is 1 degree C. Valid temperatures are from 0 to TjMax degrees C, because the actual value of temperature register is in fact a delta from TjMax. @@ -27,13 +32,15 @@ mechanism will perform actions to forcibly cool down the processor. Alarm may be raised, if the temperature grows enough (more than TjMax) to trigger the Out-Of-Spec bit. Following table summarizes the exported sysfs files: -temp1_input - Core temperature (in millidegrees Celsius). -temp1_max - All cooling devices should be turned on (on Core2). -temp1_crit - Maximum junction temperature (in millidegrees Celsius). -temp1_crit_alarm - Set when Out-of-spec bit is set, never clears. +All Sysfs entries are named with their core_id (represented here by 'X'). +tempX_input - Core temperature (in millidegrees Celsius). +tempX_max - All cooling devices should be turned on (on Core2). +tempX_crit - Maximum junction temperature (in millidegrees Celsius). +tempX_crit_alarm - Set when Out-of-spec bit is set, never clears. Correct CPU operation is no longer guaranteed. -temp1_label - Contains string "Core X", where X is processor - number. +tempX_label - Contains string "Core X", where X is processor + number. For Package temp, this will be "Physical id Y", + where Y is the package number. The TjMax temperature is set to 85 degrees C if undocumented model specific register (UMSR) 0xee has bit 30 set. If not the TjMax is 100 degrees C as diff --git a/Documentation/hwmon/emc6w201 b/Documentation/hwmon/emc6w201 new file mode 100644 index 000000000000..32f355aaf56b --- /dev/null +++ b/Documentation/hwmon/emc6w201 @@ -0,0 +1,42 @@ +Kernel driver emc6w201 +====================== + +Supported chips: + * SMSC EMC6W201 + Prefix: 'emc6w201' + Addresses scanned: I2C 0x2c, 0x2d, 0x2e + Datasheet: Not public + +Author: Jean Delvare + + +Description +----------- + +From the datasheet: + +"The EMC6W201 is an environmental monitoring device with automatic fan +control capability and enhanced system acoustics for noise suppression. +This ACPI compliant device provides hardware monitoring for up to six +voltages (including its own VCC) and five external thermal sensors, +measures the speed of up to five fans, and controls the speed of +multiple DC fans using three Pulse Width Modulator (PWM) outputs. Note +that it is possible to control more than three fans by connecting two +fans to one PWM output. The EMC6W201 will be available in a 36-pin +QFN package." + +The device is functionally close to the EMC6D100 series, but is +register-incompatible. + +The driver currently only supports the monitoring of the voltages, +temperatures and fan speeds. Limits can be changed. Alarms are not +supported, and neither is fan speed control. + + +Known Systems With EMC6W201 +--------------------------- + +The EMC6W201 is a rare device, only found on a few systems, made in +2005 and 2006. Known systems with this device: +* Dell Precision 670 workstation +* Gigabyte 2CEWH mainboard diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg index df02245d1419..84d2623810f3 100644 --- a/Documentation/hwmon/f71882fg +++ b/Documentation/hwmon/f71882fg @@ -6,6 +6,10 @@ Supported chips: Prefix: 'f71808e' Addresses scanned: none, address read from Super I/O config space Datasheet: Not public + * Fintek F71808A + Prefix: 'f71808a' + Addresses scanned: none, address read from Super I/O config space + Datasheet: Not public * Fintek F71858FG Prefix: 'f71858fg' Addresses scanned: none, address read from Super I/O config space diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power new file mode 100644 index 000000000000..a92918e0bd69 --- /dev/null +++ b/Documentation/hwmon/fam15h_power @@ -0,0 +1,37 @@ +Kernel driver fam15h_power +========================== + +Supported chips: +* AMD Family 15h Processors + + Prefix: 'fam15h_power' + Addresses scanned: PCI space + Datasheets: + BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors + (not yet published) + +Author: Andreas Herrmann + +Description +----------- + +This driver permits reading of registers providing power information +of AMD Family 15h processors. + +For AMD Family 15h processors the following power values can be +calculated using different processor northbridge function registers: + +* BasePwrWatts: Specifies in watts the maximum amount of power + consumed by the processor for NB and logic external to the core. +* ProcessorPwrWatts: Specifies in watts the maximum amount of power + the processor can support. +* CurrPwrWatts: Specifies in watts the current amount of power being + consumed by the processor. + +This driver provides ProcessorPwrWatts and CurrPwrWatts: +* power1_crit (ProcessorPwrWatts) +* power1_input (CurrPwrWatts) + +On multi-node processors the calculated value is for the entire +package and not for a single node. Thus the driver creates sysfs +attributes only for internal node0 of a multi-node processor. diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp index d2b56a4fd1f5..0393c89277c0 100644 --- a/Documentation/hwmon/k10temp +++ b/Documentation/hwmon/k10temp @@ -11,6 +11,7 @@ Supported chips: Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra) * AMD Family 12h processors: "Llano" * AMD Family 14h processors: "Brazos" (C/E/G-Series) +* AMD Family 15h processors: "Bulldozer" Prefix: 'k10temp' Addresses scanned: PCI space @@ -40,7 +41,7 @@ Description ----------- This driver permits reading of the internal temperature sensor of AMD -Family 10h/11h/12h/14h processors. +Family 10h/11h/12h/14h/15h processors. All these processors have a sensor, but on those for Socket F or AM2+, the sensor may return inconsistent values (erratum 319). The driver diff --git a/Documentation/hwmon/lm90 b/Documentation/hwmon/lm90 index fa475c0a48a3..f3efd18e87f4 100644 --- a/Documentation/hwmon/lm90 +++ b/Documentation/hwmon/lm90 @@ -32,6 +32,16 @@ Supported chips: Addresses scanned: I2C 0x4c and 0x4d Datasheet: Publicly available at the ON Semiconductor website http://www.onsemi.com/PowerSolutions/product.do?id=ADT7461 + * Analog Devices ADT7461A + Prefix: 'adt7461a' + Addresses scanned: I2C 0x4c and 0x4d + Datasheet: Publicly available at the ON Semiconductor website + http://www.onsemi.com/PowerSolutions/product.do?id=ADT7461A + * ON Semiconductor NCT1008 + Prefix: 'nct1008' + Addresses scanned: I2C 0x4c and 0x4d + Datasheet: Publicly available at the ON Semiconductor website + http://www.onsemi.com/PowerSolutions/product.do?id=NCT1008 * Maxim MAX6646 Prefix: 'max6646' Addresses scanned: I2C 0x4d @@ -149,7 +159,7 @@ ADM1032: * ALERT is triggered by open remote sensor. * SMBus PEC support for Write Byte and Receive Byte transactions. -ADT7461: +ADT7461, ADT7461A, NCT1008: * Extended temperature range (breaks compatibility) * Lower resolution for remote temperature @@ -195,9 +205,9 @@ are exported, one for each channel, but these values are of course linked. Only the local hysteresis can be set from user-space, and the same delta applies to the remote hysteresis. -The lm90 driver will not update its values more frequently than every -other second; reading them more often will do no harm, but will return -'old' values. +The lm90 driver will not update its values more frequently than configured with +the update_interval attribute; reading them more often will do no harm, but will +return 'old' values. SMBus Alert Support ------------------- @@ -205,11 +215,12 @@ SMBus Alert Support This driver has basic support for SMBus alert. When an alert is received, the status register is read and the faulty temperature channel is logged. -The Analog Devices chips (ADM1032 and ADT7461) do not implement the SMBus -alert protocol properly so additional care is needed: the ALERT output is -disabled when an alert is received, and is re-enabled only when the alarm -is gone. Otherwise the chip would block alerts from other chips in the bus -as long as the alarm is active. +The Analog Devices chips (ADM1032, ADT7461 and ADT7461A) and ON +Semiconductor chips (NCT1008) do not implement the SMBus alert protocol +properly so additional care is needed: the ALERT output is disabled when +an alert is received, and is re-enabled only when the alarm is gone. +Otherwise the chip would block alerts from other chips in the bus as long +as the alarm is active. PEC Support ----------- diff --git a/Documentation/hwmon/max16064 b/Documentation/hwmon/max16064 new file mode 100644 index 000000000000..41728999e142 --- /dev/null +++ b/Documentation/hwmon/max16064 @@ -0,0 +1,62 @@ +Kernel driver max16064 +====================== + +Supported chips: + * Maxim MAX16064 + Prefix: 'max16064' + Addresses scanned: - + Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf + +Author: Guenter Roeck + + +Description +----------- + +This driver supports hardware montoring for Maxim MAX16064 Quad Power-Supply +Controller with Active-Voltage Output Control and PMBus Interface. + +The driver is a client driver to the core PMBus driver. +Please see Documentation/hwmon/pmbus for details on PMBus client drivers. + + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Platform data support +--------------------- + +The driver supports standard PMBus driver platform data. + + +Sysfs entries +------------- + +The following attributes are supported. Limits are read-write; all other +attributes are read-only. + +in[1-4]_label "vout[1-4]" +in[1-4]_input Measured voltage. From READ_VOUT register. +in[1-4]_min Minumum Voltage. From VOUT_UV_WARN_LIMIT register. +in[1-4]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register. +in[1-4]_lcrit Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register. +in[1-4]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register. +in[1-4]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status. +in[1-4]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status. +in[1-4]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status. +in[1-4]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status. + +temp1_input Measured temperature. From READ_TEMPERATURE_1 register. +temp1_max Maximum temperature. From OT_WARN_LIMIT register. +temp1_crit Critical high temperature. From OT_FAULT_LIMIT register. +temp1_max_alarm Chip temperature high alarm. Set by comparing + READ_TEMPERATURE_1 with OT_WARN_LIMIT if TEMP_OT_WARNING + status is set. +temp1_crit_alarm Chip temperature critical high alarm. Set by comparing + READ_TEMPERATURE_1 with OT_FAULT_LIMIT if TEMP_OT_FAULT + status is set. diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065 new file mode 100644 index 000000000000..44b4f61e04f9 --- /dev/null +++ b/Documentation/hwmon/max16065 @@ -0,0 +1,98 @@ +Kernel driver max16065 +====================== + +Supported chips: + * Maxim MAX16065, MAX16066 + Prefixes: 'max16065', 'max16066' + Addresses scanned: - + Datasheet: + http://datasheets.maxim-ic.com/en/ds/MAX16065-MAX16066.pdf + * Maxim MAX16067 + Prefix: 'max16067' + Addresses scanned: - + Datasheet: + http://datasheets.maxim-ic.com/en/ds/MAX16067.pdf + * Maxim MAX16068 + Prefix: 'max16068' + Addresses scanned: - + Datasheet: + http://datasheets.maxim-ic.com/en/ds/MAX16068.pdf + * Maxim MAX16070/MAX16071 + Prefixes: 'max16070', 'max16071' + Addresses scanned: - + Datasheet: + http://datasheets.maxim-ic.com/en/ds/MAX16070-MAX16071.pdf + + +Author: Guenter Roeck + + +Description +----------- + +[From datasheets] The MAX16065/MAX16066 flash-configurable system managers +monitor and sequence multiple system voltages. The MAX16065/MAX16066 can also +accurately monitor (+/-2.5%) one current channel using a dedicated high-side +current-sense amplifier. The MAX16065 manages up to twelve system voltages +simultaneously, and the MAX16066 manages up to eight supply voltages. + +The MAX16067 flash-configurable system manager monitors and sequences multiple +system voltages. The MAX16067 manages up to six system voltages simultaneously. + +The MAX16068 flash-configurable system manager monitors and manages up to six +system voltages simultaneously. + +The MAX16070/MAX16071 flash-configurable system monitors supervise multiple +system voltages. The MAX16070/MAX16071 can also accurately monitor (+/-2.5%) +one current channel using a dedicated high-side current-sense amplifier. The +MAX16070 monitors up to twelve system voltages simultaneously, and the MAX16071 +monitors up to eight supply voltages. + +Each monitored channel has its own low and high critical limits. MAX16065, +MAX16066, MAX16070, and MAX16071 support an additional limit which is +configurable as either low or high secondary limit. MAX16065, MAX16066, +MAX16070, and MAX16071 also support supply current monitoring. + + +Usage Notes +----------- + +This driver does not probe for devices, since there is no register which +can be safely used to identify the chip. You will have to instantiate +the devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Sysfs entries +------------- + +in[0-11]_input Input voltage measurements. + +in12_input Voltage on CSP (Current Sense Positive) pin. + Only if the chip supports current sensing and if + current sensing is enabled. + +in[0-11]_min Low warning limit. + Supported on MAX16065, MAX16066, MAX16070, and MAX16071 + only. + +in[0-11]_max High warning limit. + Supported on MAX16065, MAX16066, MAX16070, and MAX16071 + only. + + Either low or high warning limits are supported + (depending on chip configuration), but not both. + +in[0-11]_lcrit Low critical limit. + +in[0-11]_crit High critical limit. + +in[0-11]_alarm Input voltage alarm. + +curr1_input Current sense input; only if the chip supports current + sensing and if current sensing is enabled. + Displayed current assumes 0.001 Ohm current sense + resistor. + +curr1_alarm Overcurrent alarm; only if the chip supports current + sensing and if current sensing is enabled. diff --git a/Documentation/hwmon/max34440 b/Documentation/hwmon/max34440 new file mode 100644 index 000000000000..6c525dd07d59 --- /dev/null +++ b/Documentation/hwmon/max34440 @@ -0,0 +1,79 @@ +Kernel driver max34440 +====================== + +Supported chips: + * Maxim MAX34440 + Prefixes: 'max34440' + Addresses scanned: - + Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34440.pdf + * Maxim MAX34441 + PMBus 5-Channel Power-Supply Manager and Intelligent Fan Controller + Prefixes: 'max34441' + Addresses scanned: - + Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34441.pdf + +Author: Guenter Roeck + + +Description +----------- + +This driver supports hardware montoring for Maxim MAX34440 PMBus 6-Channel +Power-Supply Manager and MAX34441 PMBus 5-Channel Power-Supply Manager +and Intelligent Fan Controller. + +The driver is a client driver to the core PMBus driver. Please see +Documentation/hwmon/pmbus for details on PMBus client drivers. + + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Platform data support +--------------------- + +The driver supports standard PMBus driver platform data. + + +Sysfs entries +------------- + +The following attributes are supported. Limits are read-write; all other +attributes are read-only. + +in[1-6]_label "vout[1-6]". +in[1-6]_input Measured voltage. From READ_VOUT register. +in[1-6]_min Minumum Voltage. From VOUT_UV_WARN_LIMIT register. +in[1-6]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register. +in[1-6]_lcrit Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register. +in[1-6]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register. +in[1-6]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status. +in[1-6]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status. +in[1-6]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status. +in[1-6]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status. + +curr[1-6]_label "iout[1-6]". +curr[1-6]_input Measured current. From READ_IOUT register. +curr[1-6]_max Maximum current. From IOUT_OC_WARN_LIMIT register. +curr[1-6]_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register. +curr[1-6]_max_alarm Current high alarm. From IOUT_OC_WARNING status. +curr[1-6]_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status. + + in6 and curr6 attributes only exist for MAX34440. + +temp[1-8]_input Measured temperatures. From READ_TEMPERATURE_1 register. + temp1 is the chip's internal temperature. temp2..temp5 + are remote I2C temperature sensors. For MAX34441, temp6 + is a remote thermal-diode sensor. For MAX34440, temp6..8 + are remote I2C temperature sensors. +temp[1-8]_max Maximum temperature. From OT_WARN_LIMIT register. +temp[1-8]_crit Critical high temperature. From OT_FAULT_LIMIT register. +temp[1-8]_max_alarm Temperature high alarm. +temp[1-8]_crit_alarm Temperature critical high alarm. + + temp7 and temp8 attributes only exist for MAX34440. diff --git a/Documentation/hwmon/max6642 b/Documentation/hwmon/max6642 new file mode 100644 index 000000000000..afbd3e4942e2 --- /dev/null +++ b/Documentation/hwmon/max6642 @@ -0,0 +1,21 @@ +Kernel driver max6642 +===================== + +Supported chips: + * Maxim MAX6642 + Prefix: 'max6642' + Addresses scanned: I2C 0x48-0x4f + Datasheet: Publicly available at the Maxim website + http://datasheets.maxim-ic.com/en/ds/MAX6642.pdf + +Authors: + Per Dalen + +Description +----------- + +The MAX6642 is a digital temperature sensor. It senses its own temperature as +well as the temperature on one external diode. + +All temperature values are given in degrees Celsius. Resolution +is 0.25 degree for the local temperature and for the remote temperature. diff --git a/Documentation/hwmon/max6650 b/Documentation/hwmon/max6650 index c565650fcfc6..58d9644a2bde 100644 --- a/Documentation/hwmon/max6650 +++ b/Documentation/hwmon/max6650 @@ -2,9 +2,13 @@ Kernel driver max6650 ===================== Supported chips: - * Maxim 6650 / 6651 + * Maxim MAX6650 Prefix: 'max6650' - Addresses scanned: I2C 0x1b, 0x1f, 0x48, 0x4b + Addresses scanned: none + Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf + * Maxim MAX6651 + Prefix: 'max6651' + Addresses scanned: none Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf Authors: @@ -15,10 +19,10 @@ Authors: Description ----------- -This driver implements support for the Maxim 6650/6651 +This driver implements support for the Maxim MAX6650 and MAX6651. -The 2 devices are very similar, but the Maxim 6550 has a reduced feature -set, e.g. only one fan-input, instead of 4 for the 6651. +The 2 devices are very similar, but the MAX6550 has a reduced feature +set, e.g. only one fan-input, instead of 4 for the MAX6651. The driver is not able to distinguish between the 2 devices. @@ -36,6 +40,13 @@ fan1_div rw sets the speed range the inputs can handle. Legal values are 1, 2, 4, and 8. Use lower values for faster fans. +Usage notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + Module parameters ----------------- diff --git a/Documentation/hwmon/max8688 b/Documentation/hwmon/max8688 new file mode 100644 index 000000000000..0ddd3a412030 --- /dev/null +++ b/Documentation/hwmon/max8688 @@ -0,0 +1,69 @@ +Kernel driver max8688 +===================== + +Supported chips: + * Maxim MAX8688 + Prefix: 'max8688' + Addresses scanned: - + Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf + +Author: Guenter Roeck + + +Description +----------- + +This driver supports hardware montoring for Maxim MAX8688 Digital Power-Supply +Controller/Monitor with PMBus Interface. + +The driver is a client driver to the core PMBus driver. Please see +Documentation/hwmon/pmbus for details on PMBus client drivers. + + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Platform data support +--------------------- + +The driver supports standard PMBus driver platform data. + + +Sysfs entries +------------- + +The following attributes are supported. Limits are read-write; all other +attributes are read-only. + +in1_label "vout1" +in1_input Measured voltage. From READ_VOUT register. +in1_min Minumum Voltage. From VOUT_UV_WARN_LIMIT register. +in1_max Maximum voltage. From VOUT_OV_WARN_LIMIT register. +in1_lcrit Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register. +in1_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register. +in1_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status. +in1_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status. +in1_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status. +in1_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status. + +curr1_label "iout1" +curr1_input Measured current. From READ_IOUT register. +curr1_max Maximum current. From IOUT_OC_WARN_LIMIT register. +curr1_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register. +curr1_max_alarm Current high alarm. From IOUT_OC_WARN_LIMIT register. +curr1_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status. + +temp1_input Measured temperature. From READ_TEMPERATURE_1 register. +temp1_max Maximum temperature. From OT_WARN_LIMIT register. +temp1_crit Critical high temperature. From OT_FAULT_LIMIT register. +temp1_max_alarm Chip temperature high alarm. Set by comparing + READ_TEMPERATURE_1 with OT_WARN_LIMIT if TEMP_OT_WARNING + status is set. +temp1_crit_alarm Chip temperature critical high alarm. Set by comparing + READ_TEMPERATURE_1 with OT_FAULT_LIMIT if TEMP_OT_FAULT + status is set. diff --git a/Documentation/hwmon/pkgtemp b/Documentation/hwmon/pkgtemp deleted file mode 100644 index c8e1fb0fadd3..000000000000 --- a/Documentation/hwmon/pkgtemp +++ /dev/null @@ -1,36 +0,0 @@ -Kernel driver pkgtemp -====================== - -Supported chips: - * Intel family - Prefix: 'pkgtemp' - CPUID: - Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual - Volume 3A: System Programming Guide - -Author: Fenghua Yu - -Description ------------ - -This driver permits reading package level temperature sensor embedded inside -Intel CPU package. The sensors can be in core, uncore, memory controller, or -other components in a package. The feature is first implemented in Intel Sandy -Bridge platform. - -Temperature is measured in degrees Celsius and measurement resolution is -1 degree C. Valid temperatures are from 0 to TjMax degrees C, because the actual -value of temperature register is in fact a delta from TjMax. - -Temperature known as TjMax is the maximum junction temperature of package. -We get this from MSR_IA32_TEMPERATURE_TARGET. If the MSR is not accessible, -we define TjMax as 100 degrees Celsius. At this temperature, protection -mechanism will perform actions to forcibly cool down the package. Alarm -may be raised, if the temperature grows enough (more than TjMax) to trigger -the Out-Of-Spec bit. Following table summarizes the exported sysfs files: - -temp1_input - Package temperature (in millidegrees Celsius). -temp1_max - All cooling devices should be turned on. -temp1_crit - Maximum junction temperature (in millidegrees Celsius). -temp1_crit_alarm - Set when Out-of-spec bit is set, never clears. - Correct CPU operation is no longer guaranteed. diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus index dc4933e96344..5e462fc7f99b 100644 --- a/Documentation/hwmon/pmbus +++ b/Documentation/hwmon/pmbus @@ -13,26 +13,6 @@ Supported chips: Prefix: 'ltc2978' Addresses scanned: - Datasheet: http://cds.linear.com/docs/Datasheet/2978fa.pdf - * Maxim MAX16064 - Quad Power-Supply Controller - Prefix: 'max16064' - Addresses scanned: - - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf - * Maxim MAX34440 - PMBus 6-Channel Power-Supply Manager - Prefixes: 'max34440' - Addresses scanned: - - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34440.pdf - * Maxim MAX34441 - PMBus 5-Channel Power-Supply Manager and Intelligent Fan Controller - Prefixes: 'max34441' - Addresses scanned: - - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34441.pdf - * Maxim MAX8688 - Digital Power-Supply Controller/Monitor - Prefix: 'max8688' - Addresses scanned: - - Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf * Generic PMBus devices Prefix: 'pmbus' Addresses scanned: - @@ -175,11 +155,13 @@ currX_crit Critical maximum current. From IIN_OC_FAULT_LIMIT or IOUT_OC_FAULT_LIMIT register. currX_alarm Current high alarm. From IIN_OC_WARNING or IOUT_OC_WARNING status. +currX_max_alarm Current high alarm. + From IIN_OC_WARN_LIMIT or IOUT_OC_WARN_LIMIT status. currX_lcrit_alarm Output current critical low alarm. From IOUT_UC_FAULT status. currX_crit_alarm Current critical high alarm. From IIN_OC_FAULT or IOUT_OC_FAULT status. -currX_label "iin" or "vinY" +currX_label "iin" or "ioutY" powerX_input Measured power. From READ_PIN or READ_POUT register. powerX_cap Output power cap. From POUT_MAX register. @@ -193,13 +175,13 @@ powerX_crit_alarm Output power critical high alarm. From POUT_OP_FAULT status. powerX_label "pin" or "poutY" -tempX_input Measured tempererature. +tempX_input Measured temperature. From READ_TEMPERATURE_X register. -tempX_min Mimimum tempererature. From UT_WARN_LIMIT register. -tempX_max Maximum tempererature. From OT_WARN_LIMIT register. -tempX_lcrit Critical low tempererature. +tempX_min Mimimum temperature. From UT_WARN_LIMIT register. +tempX_max Maximum temperature. From OT_WARN_LIMIT register. +tempX_lcrit Critical low temperature. From UT_FAULT_LIMIT register. -tempX_crit Critical high tempererature. +tempX_crit Critical high temperature. From OT_FAULT_LIMIT register. tempX_min_alarm Chip temperature low alarm. Set by comparing READ_TEMPERATURE_X with UT_WARN_LIMIT if diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15 new file mode 100644 index 000000000000..02850bdfac18 --- /dev/null +++ b/Documentation/hwmon/sht15 @@ -0,0 +1,74 @@ +Kernel driver sht15 +=================== + +Authors: + * Wouter Horre + * Jonathan Cameron + * Vivien Didelot + * Jerome Oufella + +Supported chips: + * Sensirion SHT10 + Prefix: 'sht10' + + * Sensirion SHT11 + Prefix: 'sht11' + + * Sensirion SHT15 + Prefix: 'sht15' + + * Sensirion SHT71 + Prefix: 'sht71' + + * Sensirion SHT75 + Prefix: 'sht75' + +Datasheet: Publicly available at the Sensirion website +http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf + +Description +----------- + +The SHT10, SHT11, SHT15, SHT71, and SHT75 are humidity and temperature +sensors. + +The devices communicate using two GPIO lines. + +Supported resolutions for the measurements are 14 bits for temperature and 12 +bits for humidity, or 12 bits for temperature and 8 bits for humidity. + +The humidity calibration coefficients are programmed into an OTP memory on the +chip. These coefficients are used to internally calibrate the signals from the +sensors. Disabling the reload of those coefficients allows saving 10ms for each +measurement and decrease power consumption, while loosing on precision. + +Some options may be set directly in the sht15_platform_data structure +or via sysfs attributes. + +Notes: + * The regulator supply name is set to "vcc". + * If a CRC validation fails, a soft reset command is sent, which resets + status register to its hardware default value, but the driver will try to + restore the previous device configuration. + +Platform data +------------- + +* checksum: + set it to true to enable CRC validation of the readings (default to false). +* no_otp_reload: + flag to indicate not to reload from OTP (default to false). +* low_resolution: + flag to indicate the temp/humidity resolution to use (default to false). + +Sysfs interface +--------------- + +* temp1_input: temperature input +* humidity1_input: humidity input +* heater_enable: write 1 in this attribute to enable the on-chip heater, + 0 to disable it. Be careful not to enable the heater + for too long. +* temp1_fault: if 1, this means that the voltage is low (below 2.47V) and + measurement may be invalid. +* humidity1_fault: same as temp1_fault. diff --git a/Documentation/hwmon/smm665 b/Documentation/hwmon/smm665 index 3820fc9ca52d..59e316140542 100644 --- a/Documentation/hwmon/smm665 +++ b/Documentation/hwmon/smm665 @@ -150,8 +150,8 @@ in8_crit_alarm Channel F critical alarm in9_crit_alarm AIN1 critical alarm in10_crit_alarm AIN2 critical alarm -temp1_input Chip tempererature -temp1_min Mimimum chip tempererature -temp1_max Maximum chip tempererature -temp1_crit Critical chip tempererature +temp1_input Chip temperature +temp1_min Mimimum chip temperature +temp1_max Maximum chip temperature +temp1_crit Critical chip temperature temp1_crit_alarm Temperature critical alarm diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches new file mode 100644 index 000000000000..86f42e8e9e49 --- /dev/null +++ b/Documentation/hwmon/submitting-patches @@ -0,0 +1,109 @@ + How to Get Your Patch Accepted Into the Hwmon Subsystem + ------------------------------------------------------- + +This text is is a collection of suggestions for people writing patches or +drivers for the hwmon subsystem. Following these suggestions will greatly +increase the chances of your change being accepted. + + +1. General +---------- + +* It should be unnecessary to mention, but please read and follow + Documentation/SubmitChecklist + Documentation/SubmittingDrivers + Documentation/SubmittingPatches + Documentation/CodingStyle + +* If your patch generates checkpatch warnings, please refrain from explanations + such as "I don't like that coding style". Keep in mind that each unnecessary + warning helps hiding a real problem. If you don't like the kernel coding + style, don't write kernel drivers. + +* Please test your patch thoroughly. We are not your test group. + Sometimes a patch can not or not completely be tested because of missing + hardware. In such cases, you should test-build the code on at least one + architecture. If run-time testing was not achieved, it should be written + explicitly below the patch header. + +* If your patch (or the driver) is affected by configuration options such as + CONFIG_SMP or CONFIG_HOTPLUG, make sure it compiles for all configuration + variants. + + +2. Adding functionality to existing drivers +------------------------------------------- + +* Make sure the documentation in Documentation/hwmon/ is up to + date. + +* Make sure the information in Kconfig is up to date. + +* If the added functionality requires some cleanup or structural changes, split + your patch into a cleanup part and the actual addition. This makes it easier + to review your changes, and to bisect any resulting problems. + +* Never mix bug fixes, cleanup, and functional enhancements in a single patch. + + +3. New drivers +-------------- + +* Running your patch or driver file(s) through checkpatch does not mean its + formatting is clean. If unsure about formatting in your new driver, run it + through Lindent. Lindent is not perfect, and you may have to do some minor + cleanup, but it is a good start. + +* Consider adding yourself to MAINTAINERS. + +* Document the driver in Documentation/hwmon/. + +* Add the driver to Kconfig and Makefile in alphabetical order. + +* Make sure that all dependencies are listed in Kconfig. For new drivers, it + is most likely prudent to add a dependency on EXPERIMENTAL. + +* Avoid forward declarations if you can. Rearrange the code if necessary. + +* Avoid calculations in macros and macro-generated functions. While such macros + may save a line or so in the source, it obfuscates the code and makes code + review more difficult. It may also result in code which is more complicated + than necessary. Use inline functions or just regular functions instead. + +* If the driver has a detect function, make sure it is silent. Debug messages + and messages printed after a successful detection are acceptable, but it + must not print messages such as "Chip XXX not found/supported". + + Keep in mind that the detect function will run for all drivers supporting an + address if a chip is detected on that address. Unnecessary messages will just + pollute the kernel log and not provide any value. + +* Provide a detect function if and only if a chip can be detected reliably. + +* Avoid writing to chip registers in the detect function. If you have to write, + only do it after you have already gathered enough data to be certain that the + detection is going to be successful. + + Keep in mind that the chip might not be what your driver believes it is, and + writing to it might cause a bad misconfiguration. + +* Make sure there are no race conditions in the probe function. Specifically, + completely initialize your chip first, then create sysfs entries and register + with the hwmon subsystem. + +* Do not provide support for deprecated sysfs attributes. + +* Do not create non-standard attributes unless really needed. If you have to use + non-standard attributes, or you believe you do, discuss it on the mailing list + first. Either case, provide a detailed explanation why you need the + non-standard attribute(s). + Standard attributes are specified in Documentation/hwmon/sysfs-interface. + +* When deciding which sysfs attributes to support, look at the chip's + capabilities. While we do not expect your driver to support everything the + chip may offer, it should at least support all limits and alarms. + +* Last but not least, please check if a driver for your chip already exists + before starting to write a new driver. Especially for temperature sensors, + new chips are often variants of previously released chips. In some cases, + a presumably new chip may simply have been relabeled. diff --git a/Documentation/hwmon/ucd9000 b/Documentation/hwmon/ucd9000 new file mode 100644 index 000000000000..40ca6db50c48 --- /dev/null +++ b/Documentation/hwmon/ucd9000 @@ -0,0 +1,110 @@ +Kernel driver ucd9000 +===================== + +Supported chips: + * TI UCD90120, UCD90124, UCD9090, and UCD90910 + Prefixes: 'ucd90120', 'ucd90124', 'ucd9090', 'ucd90910' + Addresses scanned: - + Datasheets: + http://focus.ti.com/lit/ds/symlink/ucd90120.pdf + http://focus.ti.com/lit/ds/symlink/ucd90124.pdf + http://focus.ti.com/lit/ds/symlink/ucd9090.pdf + http://focus.ti.com/lit/ds/symlink/ucd90910.pdf + +Author: Guenter Roeck + + +Description +----------- + +From datasheets: + +The UCD90120 Power Supply Sequencer and System Health Monitor monitors and +sequences up to 12 independent voltage rails. The device integrates a 12-bit +ADC with a 2.5V internal reference for monitoring up to 13 power supply voltage, +current, or temperature inputs. + +The UCD90124 is a 12-rail PMBus/I2C addressable power-supply sequencer and +system-health monitor. The device integrates a 12-bit ADC for monitoring up to +13 power-supply voltage, current, or temperature inputs. Twenty-six GPIO pins +can be used for power supply enables, power-on reset signals, external +interrupts, cascading, or other system functions. Twelve of these pins offer PWM +functionality. Using these pins, the UCD90124 offers support for fan control, +margining, and general-purpose PWM functions. + +The UCD9090 is a 10-rail PMBus/I2C addressable power-supply sequencer and +monitor. The device integrates a 12-bit ADC for monitoring up to 10 power-supply +voltage inputs. Twenty-three GPIO pins can be used for power supply enables, +power-on reset signals, external interrupts, cascading, or other system +functions. Ten of these pins offer PWM functionality. Using these pins, the +UCD9090 offers support for margining, and general-purpose PWM functions. + +The UCD90910 is a ten-rail I2C / PMBus addressable power-supply sequencer and +system-health monitor. The device integrates a 12-bit ADC for monitoring up to +13 power-supply voltage, current, or temperature inputs. + +This driver is a client driver to the core PMBus driver. Please see +Documentation/hwmon/pmbus for details on PMBus client drivers. + + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Platform data support +--------------------- + +The driver supports standard PMBus driver platform data. Please see +Documentation/hwmon/pmbus for details. + + +Sysfs entries +------------- + +The following attributes are supported. Limits are read-write; all other +attributes are read-only. + +in[1-12]_label "vout[1-12]". +in[1-12]_input Measured voltage. From READ_VOUT register. +in[1-12]_min Minumum Voltage. From VOUT_UV_WARN_LIMIT register. +in[1-12]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register. +in[1-12]_lcrit Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register. +in[1-12]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register. +in[1-12]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status. +in[1-12]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status. +in[1-12]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status. +in[1-12]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status. + +curr[1-12]_label "iout[1-12]". +curr[1-12]_input Measured current. From READ_IOUT register. +curr[1-12]_max Maximum current. From IOUT_OC_WARN_LIMIT register. +curr[1-12]_lcrit Critical minumum output current. From IOUT_UC_FAULT_LIMIT + register. +curr[1-12]_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register. +curr[1-12]_max_alarm Current high alarm. From IOUT_OC_WARNING status. +curr[1-12]_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status. + + For each attribute index, either voltage or current is + reported, but not both. If voltage or current is + reported depends on the chip configuration. + +temp[1-2]_input Measured temperatures. From READ_TEMPERATURE_1 and + READ_TEMPERATURE_2 registers. +temp[1-2]_max Maximum temperature. From OT_WARN_LIMIT register. +temp[1-2]_crit Critical high temperature. From OT_FAULT_LIMIT register. +temp[1-2]_max_alarm Temperature high alarm. +temp[1-2]_crit_alarm Temperature critical high alarm. + +fan[1-4]_input Fan RPM. +fan[1-4]_alarm Fan alarm. +fan[1-4]_fault Fan fault. + + Fan attributes are only available on chips supporting + fan control (UCD90124, UCD90910). Attribute files are + created only for enabled fans. + Note that even though UCD90910 supports up to 10 fans, + only up to four fans are currently supported. diff --git a/Documentation/hwmon/ucd9200 b/Documentation/hwmon/ucd9200 new file mode 100644 index 000000000000..3c58607f72fe --- /dev/null +++ b/Documentation/hwmon/ucd9200 @@ -0,0 +1,112 @@ +Kernel driver ucd9200 +===================== + +Supported chips: + * TI UCD9220, UCD9222, UCD9224, UCD9240, UCD9244, UCD9246, and UCD9248 + Prefixes: 'ucd9220', 'ucd9222', 'ucd9224', 'ucd9240', 'ucd9244', 'ucd9246', + 'ucd9248' + Addresses scanned: - + Datasheets: + http://focus.ti.com/lit/ds/symlink/ucd9220.pdf + http://focus.ti.com/lit/ds/symlink/ucd9222.pdf + http://focus.ti.com/lit/ds/symlink/ucd9224.pdf + http://focus.ti.com/lit/ds/symlink/ucd9240.pdf + http://focus.ti.com/lit/ds/symlink/ucd9244.pdf + http://focus.ti.com/lit/ds/symlink/ucd9246.pdf + http://focus.ti.com/lit/ds/symlink/ucd9248.pdf + +Author: Guenter Roeck + + +Description +----------- + +[From datasheets] UCD9220, UCD9222, UCD9224, UCD9240, UCD9244, UCD9246, and +UCD9248 are multi-rail, multi-phase synchronous buck digital PWM controllers +designed for non-isolated DC/DC power applications. The devices integrate +dedicated circuitry for DC/DC loop management with flash memory and a serial +interface to support configuration, monitoring and management. + +This driver is a client driver to the core PMBus driver. Please see +Documentation/hwmon/pmbus for details on PMBus client drivers. + + +Usage Notes +----------- + +This driver does not auto-detect devices. You will have to instantiate the +devices explicitly. Please see Documentation/i2c/instantiating-devices for +details. + + +Platform data support +--------------------- + +The driver supports standard PMBus driver platform data. Please see +Documentation/hwmon/pmbus for details. + + +Sysfs entries +------------- + +The following attributes are supported. Limits are read-write; all other +attributes are read-only. + +in1_label "vin". +in1_input Measured voltage. From READ_VIN register. +in1_min Minumum Voltage. From VIN_UV_WARN_LIMIT register. +in1_max Maximum voltage. From VIN_OV_WARN_LIMIT register. +in1_lcrit Critical minumum Voltage. VIN_UV_FAULT_LIMIT register. +in1_crit Critical maximum voltage. From VIN_OV_FAULT_LIMIT register. +in1_min_alarm Voltage low alarm. From VIN_UV_WARNING status. +in1_max_alarm Voltage high alarm. From VIN_OV_WARNING status. +in1_lcrit_alarm Voltage critical low alarm. From VIN_UV_FAULT status. +in1_crit_alarm Voltage critical high alarm. From VIN_OV_FAULT status. + +in[2-5]_label "vout[1-4]". +in[2-5]_input Measured voltage. From READ_VOUT register. +in[2-5]_min Minumum Voltage. From VOUT_UV_WARN_LIMIT register. +in[2-5]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register. +in[2-5]_lcrit Critical minumum Voltage. VOUT_UV_FAULT_LIMIT register. +in[2-5]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register. +in[2-5]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status. +in[2-5]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status. +in[2-5]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status. +in[2-5]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status. + +curr1_label "iin". +curr1_input Measured current. From READ_IIN register. + +curr[2-5]_label "iout[1-4]". +curr[2-5]_input Measured current. From READ_IOUT register. +curr[2-5]_max Maximum current. From IOUT_OC_WARN_LIMIT register. +curr[2-5]_lcrit Critical minumum output current. From IOUT_UC_FAULT_LIMIT + register. +curr[2-5]_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register. +curr[2-5]_max_alarm Current high alarm. From IOUT_OC_WARNING status. +curr[2-5]_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status. + +power1_input Measured input power. From READ_PIN register. +power1_label "pin" + +power[2-5]_input Measured output power. From READ_POUT register. +power[2-5]_label "pout[1-4]" + + The number of output voltage, current, and power + attribute sets is determined by the number of enabled + rails. See chip datasheets for details. + +temp[1-5]_input Measured temperatures. From READ_TEMPERATURE_1 and + READ_TEMPERATURE_2 registers. + temp1 is the chip internal temperature. temp[2-5] are + rail temperatures. temp[2-5] attributes are only + created for enabled rails. See chip datasheets for + details. +temp[1-5]_max Maximum temperature. From OT_WARN_LIMIT register. +temp[1-5]_crit Critical high temperature. From OT_FAULT_LIMIT register. +temp[1-5]_max_alarm Temperature high alarm. +temp[1-5]_crit_alarm Temperature critical high alarm. + +fan1_input Fan RPM. ucd9240 only. +fan1_alarm Fan alarm. ucd9240 only. +fan1_fault Fan fault. ucd9240 only. diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index 6df69765ccb7..2871fd500349 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -19,6 +19,7 @@ Supported adapters: * Intel 6 Series (PCH) * Intel Patsburg (PCH) * Intel DH89xxCC (PCH) + * Intel Panther Point (PCH) Datasheets: Publicly available at the Intel website On Intel Patsburg and later chipsets, both the normal host SMBus controller diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index 5ebf5af1d716..5aa53374ea2a 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -38,7 +38,7 @@ static struct i2c_driver foo_driver = { .name = "foo", }, - .id_table = foo_ids, + .id_table = foo_idtable, .probe = foo_probe, .remove = foo_remove, /* if device autodetection is needed: */ diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt index a0a5d82b6b0b..3a46e360496d 100644 --- a/Documentation/ioctl/ioctl-number.txt +++ b/Documentation/ioctl/ioctl-number.txt @@ -166,7 +166,6 @@ Code Seq#(hex) Include File Comments 'T' all arch/x86/include/asm/ioctls.h conflict! 'T' C0-DF linux/if_tun.h conflict! 'U' all sound/asound.h conflict! -'U' 00-0F drivers/media/video/uvc/uvcvideo.h conflict! 'U' 00-CF linux/uinput.h conflict! 'U' 00-EF linux/usbdevice_fs.h 'U' C0-CF drivers/bluetooth/hci_uart.h @@ -259,6 +258,7 @@ Code Seq#(hex) Include File Comments 't' 80-8F linux/isdn_ppp.h 't' 90 linux/toshiba.h 'u' 00-1F linux/smb_fs.h gone +'u' 20-3F linux/uvcvideo.h USB video class host driver 'v' 00-1F linux/ext2_fs.h conflict! 'v' 00-1F linux/fs.h conflict! 'v' 00-0F linux/sonypi.h conflict! @@ -304,6 +304,7 @@ Code Seq#(hex) Include File Comments 0xB0 all RATIO devices in development: 0xB1 00-1F PPPoX +0xB3 00 linux/mmc/ioctl.h 0xC0 00-0F linux/usb/iowarrior.h 0xCB 00-1F CBM serial IEC bus in development: diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO index b63301a03811..050d37fe6d40 100644 --- a/Documentation/ja_JP/HOWTO +++ b/Documentation/ja_JP/HOWTO @@ -11,14 +11,14 @@ for non English (read: Japanese) speakers and is not intended as a fork. So if you have any comments or updates for this file, please try to update the original English file first. -Last Updated: 2008/10/24 +Last Updated: 2011/03/31 ================================== これは、 -linux-2.6.28/Documentation/HOWTO +linux-2.6.38/Documentation/HOWTO の和訳です。 翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ > -翻訳日: 2008/10/24 +翻訳日: 2011/3/28 翻訳者: Tsugikazu Shibata 校正者: 松倉さん 小林 雅典さん (Masanori Kobayasi) @@ -256,8 +256,8 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン - メインの 2.6.x カーネルツリー - 2.6.x.y -stable カーネルツリー - 2.6.x -git カーネルパッチ - - 2.6.x -mm カーネルパッチ - サブシステム毎のカーネルツリーとパッチ + - 統合テストのための 2.6.x -next カーネルツリー 2.6.x カーネルツリー ----------------- @@ -268,9 +268,9 @@ Linux カーネルの開発プロセスは現在幾つかの異なるメイン - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、 この期間中に、メンテナ達は Linus に大きな差分を送ることができます。 - このような差分は通常 -mm カーネルに数週間含まれてきたパッチです。 + このような差分は通常 -next カーネルに数週間含まれてきたパッチです。 大きな変更は git(カーネルのソース管理ツール、詳細は - http://git.or.cz/ 参照) を使って送るのが好ましいやり方ですが、パッ + http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ チファイルの形式のまま送るのでも十分です。 - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定 @@ -333,86 +333,44 @@ git リポジトリで管理されているLinus のカーネルツリーの毎 れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的 に生成されるので、より実験的です。 -2.6.x -mm カーネルパッチ ------------------------- - -Andrew Morton によってリリースされる実験的なカーネルパッチ群です。 -Andrew は個別のサブシステムカーネルツリーとパッチを全て集めてきて -linux-kernel メーリングリストで収集された多数のパッチと同時に一つにま -とめます。 -このツリーは新機能とパッチが検証される場となります。ある期間の間パッチ -が -mm に入って価値を証明されたら、Andrew やサブシステムメンテナが、 -メインラインへ入れるように Linus にプッシュします。 - -メインカーネルツリーに含めるために Linus に送る前に、すべての新しいパッ -チが -mm ツリーでテストされることが強く推奨されています。マージウィン -ドウが開く前に -mm ツリーに現れなかったパッチはメインラインにマージさ -れることは困難になります。 - -これらのカーネルは安定して動作すべきシステムとして使うのには適切ではあ -りませんし、カーネルブランチの中でももっとも動作にリスクが高いものです。 - -もしあなたが、カーネル開発プロセスの支援をしたいと思っているのであれば、 -どうぞこれらのカーネルリリースをテストに使ってみて、そしてもし問題があ -れば、またもし全てが正しく動作したとしても、linux-kernel メーリングリ -ストにフィードバックを提供してください。 - -すべての他の実験的パッチに加えて、これらのカーネルは通常リリース時点で -メインラインの -git カーネルに含まれる全ての変更も含んでいます。 - --mm カーネルは決まったスケジュールではリリースされません、しかし通常幾 -つかの -mm カーネル (1 から 3 が普通)が各-rc カーネルの間にリリースさ -れます。 - サブシステム毎のカーネルツリーとパッチ ------------------------------------------- -カーネルの様々な領域で何が起きているかを見られるようにするため、多くの -カーネルサブシステム開発者は彼らの開発ツリーを公開しています。これらの -ツリーは説明したように -mm カーネルリリースに入れ込まれます。 +それぞれのカーネルサブシステムのメンテナ達は --- そして多くのカーネル +サブシステムの開発者達も --- 各自の最新の開発状況をソースリポジトリに +公開しています。そのため、自分とは異なる領域のカーネルで何が起きている +かを他の人が見られるようになっています。開発が早く進んでいる領域では、 +開発者は自身の投稿がどのサブシステムカーネルツリーを元にしているか質問 +されるので、その投稿とすでに進行中の他の作業との衝突が避けられます。 -以下はさまざまなカーネルツリーの中のいくつかのリスト- +大部分のこれらのリポジトリは git ツリーです。しかしその他の SCM や +quilt シリーズとして公開されているパッチキューも使われています。これら +のサブシステムリポジトリのアドレスは MAINTAINERS ファイルにリストされ +ています。これらの多くは http://git.kernel.org/ で参照することができま +す。 - git ツリー- - - Kbuild の開発ツリー、Sam Ravnborg - git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git +提案されたパッチがこのようなサブシステムツリーにコミットされる前に、メー +リングリストで事前にレビューにかけられます(以下の対応するセクションを +参照)。いくつかのカーネルサブシステムでは、このレビューは patchwork +というツールによって追跡されます。Patchwork は web インターフェイスに +よってパッチ投稿の表示、パッチへのコメント付けや改訂などができ、そして +メンテナはパッチに対して、レビュー中、受付済み、拒否というようなマーク +をつけることができます。大部分のこれらの patchwork のサイトは +http://patchwork.kernel.org/ でリストされています。 - - ACPI の開発ツリー、 Len Brown - git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git +統合テストのための 2.6.x -next カーネルツリー +--------------------------------------------- - - Block の開発ツリー、Jens Axboe - git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git +サブシステムツリーの更新内容がメインラインの 2.6.x ツリーにマージされ +る前に、それらは統合テストされる必要があります。この目的のため、実質的 +に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ +ポジトリが存在します- + http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git + http://linux.f-seidel.de/linux-next/pmwiki/ - - DRM の開発ツリー、Dave Airlie - git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git - - - ia64 の開発ツリー、Tony Luck - git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git - - - infiniband, Roland Dreier - git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git - - - libata, Jeff Garzik - git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git - - - ネットワークドライバ, Jeff Garzik - git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git - - - pcmcia, Dominik Brodowski - git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git - - - SCSI, James Bottomley - git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git - - - x86, Ingo Molnar - git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git - - quilt ツリー- - - USB, ドライバコアと I2C, Greg Kroah-Hartman - kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ - - その他のカーネルツリーは http://git.kernel.org/ と MAINTAINERS ファ - イルに一覧表があります。 +このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン +ラインカーネルにマージされるか、おおまかなの展望を提供します。-next +カーネルの実行テストを行う冒険好きなテスターは大いに歓迎されます バグレポート ------------- @@ -673,10 +631,9 @@ Linux カーネルコミュニティは、一度に大量のコードの塊を じところからスタートしたのですから。 Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process" -(http://linux.tar.bz/articles/2.6-development_process)セクショ -ンをこのテキストの原型にすることを許可してくれました。 -Rundy Dunlap と Gerrit Huizenga はメーリングリストでやるべきこととやっ -てはいけないことのリストを提供してくれました。 +(http://lwn.net/Articles/94386/) セクションをこのテキストの原型にする +ことを許可してくれました。Rundy Dunlap と Gerrit Huizenga はメーリング +リストでやるべきこととやってはいけないことのリストを提供してくれました。 以下の人々のレビュー、コメント、貢献に感謝。 Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers, Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index 7c2a89ba674c..68e32bb6bd80 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -201,3 +201,16 @@ KBUILD_ENABLE_EXTRA_GCC_CHECKS -------------------------------------------------- If enabled over the make command line with "W=1", it turns on additional gcc -W... options for more extensive build-time checking. + +KBUILD_BUILD_TIMESTAMP +-------------------------------------------------- +Setting this to a date string overrides the timestamp used in the +UTS_VERSION definition (uname -v in the running kernel). The value has to +be a string that can be passed to date -d. The default value +is the output of the date command at one point during build. + +KBUILD_BUILD_USER, KBUILD_BUILD_HOST +-------------------------------------------------- +These two variables allow to override the user@host string displayed during +boot and in /proc/version. The default value is the output of the commands +whoami and host, respectively. diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index b507d61fd41c..44e2649fbb29 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt @@ -113,6 +113,13 @@ applicable everywhere (see syntax). That will limit the usefulness but on the other hand avoid the illegal configurations all over. +- limiting menu display: "visible if" + This attribute is only applicable to menu blocks, if the condition is + false, the menu block is not displayed to the user (the symbols + contained there can still be selected by other symbols, though). It is + similar to a conditional "prompt" attribude for individual menu + entries. Default value of "visible" is true. + - numerical ranges: "range" ["if" ] This allows to limit the range of possible input values for int and hex symbols. The user can only input a value which is larger than @@ -303,7 +310,8 @@ menu: "endmenu" This defines a menu block, see "Menu structure" above for more -information. The only possible options are dependencies. +information. The only possible options are dependencies and "visible" +attributes. if: @@ -381,3 +389,25 @@ config FOO limits FOO to module (=m) or disabled (=n). +Kconfig symbol existence +~~~~~~~~~~~~~~~~~~~~~~~~ +The following two methods produce the same kconfig symbol dependencies +but differ greatly in kconfig symbol existence (production) in the +generated config file. + +case 1: + +config FOO + tristate "about foo" + depends on BAR + +vs. case 2: + +if BAR +config FOO + tristate "about foo" +endif + +In case 1, the symbol FOO will always exist in the config file (given +no other dependencies). In case 2, the symbol FOO will only exist in +the config file if BAR is enabled. diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt index cca46b1a0f6c..c313d71324b4 100644 --- a/Documentation/kbuild/kconfig.txt +++ b/Documentation/kbuild/kconfig.txt @@ -48,11 +48,6 @@ KCONFIG_OVERWRITECONFIG If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not break symlinks when .config is a symlink to somewhere else. -KCONFIG_NOTIMESTAMP --------------------------------------------------- -If this environment variable exists and is non-null, the timestamp line -in generated .config files is omitted. - ______________________________________________________________________ Environment variables for '{allyes/allmod/allno/rand}config' diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt index 5d145bb443c0..47435e56c5da 100644 --- a/Documentation/kbuild/makefiles.txt +++ b/Documentation/kbuild/makefiles.txt @@ -40,11 +40,13 @@ This document describes the Linux kernel Makefiles. --- 6.6 Commands useful for building a boot image --- 6.7 Custom kbuild commands --- 6.8 Preprocessing linker scripts + --- 6.9 Generic header files === 7 Kbuild syntax for exported headers --- 7.1 header-y --- 7.2 objhdr-y --- 7.3 destination-y + --- 7.4 generic-y === 8 Kbuild Variables === 9 Makefile language @@ -499,6 +501,18 @@ more details, with real examples. gcc >= 3.00. For gcc < 3.00, -malign-functions=4 is used. Note: cc-option-align uses KBUILD_CFLAGS for $(CC) options + cc-disable-warning + cc-disable-warning checks if gcc supports a given warning and returns + the commandline switch to disable it. This special function is needed, + because gcc 4.4 and later accept any unknown -Wno-* option and only + warn about it if there is another warning in the source file. + + Example: + KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + + In the above example, -Wno-unused-but-set-variable will be added to + KBUILD_CFLAGS only if gcc really accepts it. + cc-version cc-version returns a numerical version of the $(CC) compiler version. The format is where both are two digits. So for example @@ -955,6 +969,11 @@ When kbuild executes, the following steps are followed (roughly): used when linking modules. This is often a linker script. From commandline LDFLAGS_MODULE shall be used (see kbuild.txt). + KBUILD_ARFLAGS Options for $(AR) when creating archives + + $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic + mode) if this option is supported by $(AR). + --- 6.2 Add prerequisites to archprepare: The archprepare: rule is used to list prerequisites that need to be @@ -1209,6 +1228,14 @@ When kbuild executes, the following steps are followed (roughly): The kbuild infrastructure for *lds file are used in several architecture-specific files. +--- 6.9 Generic header files + + The directory include/asm-generic contains the header files + that may be shared between individual architectures. + The recommended approach how to use a generic header file is + to list the file in the Kbuild file. + See "7.4 generic-y" for further info on syntax etc. + === 7 Kbuild syntax for exported headers The kernel include a set of headers that is exported to userspace. @@ -1265,6 +1292,32 @@ See subsequent chapter for the syntax of the Kbuild file. In the example above all exported headers in the Kbuild file will be located in the directory "include/linux" when exported. + --- 7.4 generic-y + + If an architecture uses a verbatim copy of a header from + include/asm-generic then this is listed in the file + arch/$(ARCH)/include/asm/Kbuild like this: + + Example: + #arch/x86/include/asm/Kbuild + generic-y += termios.h + generic-y += rtc.h + + During the prepare phase of the build a wrapper include + file is generated in the directory: + + arch/$(ARCH)/include/generated/asm + + When a header is exported where the architecture uses + the generic header a similar wrapper is generated as part + of the set of exported headers in the directory: + + usr/include/asm + + The generated wrapper will in both cases look like the following: + + Example: termios.h + #include === 8 Kbuild Variables diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cc85a9278190..fd248a318211 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -245,7 +245,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. acpi_sleep= [HW,ACPI] Sleep options Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig, - old_ordering, s4_nonvs, sci_force_enable } + old_ordering, nonvs, sci_force_enable } See Documentation/power/video.txt for information on s3_bios and s3_mode. s3_beep is for debugging; it makes the PC's speaker beep @@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. With this option on every unmap_single operation will result in a hardware IOTLB flush operation as opposed to batching them for performance. - + sp_off [Default Off] + By default, super page will be supported if Intel IOMMU + has the capability. With this option, super page will + not be supported. intremap= [X86-64, Intel-IOMMU] Format: { on (default) | off | nosid } on enable Interrupt Remapping (default) @@ -1664,6 +1667,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noexec=on: enable non-executable mappings (default) noexec=off: disable non-executable mappings + nosmep [X86] + Disable SMEP (Supervisor Mode Execution Protection) + even if it is supported by processor. + noexec32 [X86-64] This affects only 32-bit executables. noexec32=on: enable non-executable mappings (default) @@ -1773,9 +1780,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nosoftlockup [KNL] Disable the soft-lockup detector. - noswapaccount [KNL] Disable accounting of swap in memory resource - controller. (See Documentation/cgroups/memory.txt) - nosync [HW,M68K] Disables sync negotiation for all devices. notsc [BUGS=X86-32] Disable Time Stamp Counter @@ -2581,6 +2585,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. bytes of sense data); c = FIX_CAPACITY (decrease the reported device capacity by one sector); + d = NO_READ_DISC_INFO (don't use + READ_DISC_INFO command); + e = NO_READ_CAPACITY_16 (don't use + READ_CAPACITY_16 command); h = CAPACITY_HEURISTICS (decrease the reported device capacity by one sector if the number is odd); @@ -2590,6 +2598,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. unlock ejectable media); m = MAX_SECTORS_64 (don't transfer more than 64 sectors = 32 KB at a time); + n = INITIAL_READ10 (force a retry of the + initial READ(10) command); o = CAPACITY_OK (accept the capacity reported by the device); r = IGNORE_RESIDUE (the device reports diff --git a/Documentation/keys-request-key.txt b/Documentation/keys-request-key.txt deleted file mode 100644 index 69686ad12c66..000000000000 --- a/Documentation/keys-request-key.txt +++ /dev/null @@ -1,202 +0,0 @@ - =================== - KEY REQUEST SERVICE - =================== - -The key request service is part of the key retention service (refer to -Documentation/keys.txt). This document explains more fully how the requesting -algorithm works. - -The process starts by either the kernel requesting a service by calling -request_key*(): - - struct key *request_key(const struct key_type *type, - const char *description, - const char *callout_info); - -or: - - struct key *request_key_with_auxdata(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len, - void *aux); - -or: - - struct key *request_key_async(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len); - -or: - - struct key *request_key_async_with_auxdata(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len, - void *aux); - -Or by userspace invoking the request_key system call: - - key_serial_t request_key(const char *type, - const char *description, - const char *callout_info, - key_serial_t dest_keyring); - -The main difference between the access points is that the in-kernel interface -does not need to link the key to a keyring to prevent it from being immediately -destroyed. The kernel interface returns a pointer directly to the key, and -it's up to the caller to destroy the key. - -The request_key*_with_auxdata() calls are like the in-kernel request_key*() -calls, except that they permit auxiliary data to be passed to the upcaller (the -default is NULL). This is only useful for those key types that define their -own upcall mechanism rather than using /sbin/request-key. - -The two async in-kernel calls may return keys that are still in the process of -being constructed. The two non-async ones will wait for construction to -complete first. - -The userspace interface links the key to a keyring associated with the process -to prevent the key from going away, and returns the serial number of the key to -the caller. - - -The following example assumes that the key types involved don't define their -own upcall mechanisms. If they do, then those should be substituted for the -forking and execution of /sbin/request-key. - - -=========== -THE PROCESS -=========== - -A request proceeds in the following manner: - - (1) Process A calls request_key() [the userspace syscall calls the kernel - interface]. - - (2) request_key() searches the process's subscribed keyrings to see if there's - a suitable key there. If there is, it returns the key. If there isn't, - and callout_info is not set, an error is returned. Otherwise the process - proceeds to the next step. - - (3) request_key() sees that A doesn't have the desired key yet, so it creates - two things: - - (a) An uninstantiated key U of requested type and description. - - (b) An authorisation key V that refers to key U and notes that process A - is the context in which key U should be instantiated and secured, and - from which associated key requests may be satisfied. - - (4) request_key() then forks and executes /sbin/request-key with a new session - keyring that contains a link to auth key V. - - (5) /sbin/request-key assumes the authority associated with key U. - - (6) /sbin/request-key execs an appropriate program to perform the actual - instantiation. - - (7) The program may want to access another key from A's context (say a - Kerberos TGT key). It just requests the appropriate key, and the keyring - search notes that the session keyring has auth key V in its bottom level. - - This will permit it to then search the keyrings of process A with the - UID, GID, groups and security info of process A as if it was process A, - and come up with key W. - - (8) The program then does what it must to get the data with which to - instantiate key U, using key W as a reference (perhaps it contacts a - Kerberos server using the TGT) and then instantiates key U. - - (9) Upon instantiating key U, auth key V is automatically revoked so that it - may not be used again. - -(10) The program then exits 0 and request_key() deletes key V and returns key - U to the caller. - -This also extends further. If key W (step 7 above) didn't exist, key W would -be created uninstantiated, another auth key (X) would be created (as per step -3) and another copy of /sbin/request-key spawned (as per step 4); but the -context specified by auth key X will still be process A, as it was in auth key -V. - -This is because process A's keyrings can't simply be attached to -/sbin/request-key at the appropriate places because (a) execve will discard two -of them, and (b) it requires the same UID/GID/Groups all the way through. - - -==================================== -NEGATIVE INSTANTIATION AND REJECTION -==================================== - -Rather than instantiating a key, it is possible for the possessor of an -authorisation key to negatively instantiate a key that's under construction. -This is a short duration placeholder that causes any attempt at re-requesting -the key whilst it exists to fail with error ENOKEY if negated or the specified -error if rejected. - -This is provided to prevent excessive repeated spawning of /sbin/request-key -processes for a key that will never be obtainable. - -Should the /sbin/request-key process exit anything other than 0 or die on a -signal, the key under construction will be automatically negatively -instantiated for a short amount of time. - - -==================== -THE SEARCH ALGORITHM -==================== - -A search of any particular keyring proceeds in the following fashion: - - (1) When the key management code searches for a key (keyring_search_aux) it - firstly calls key_permission(SEARCH) on the keyring it's starting with, - if this denies permission, it doesn't search further. - - (2) It considers all the non-keyring keys within that keyring and, if any key - matches the criteria specified, calls key_permission(SEARCH) on it to see - if the key is allowed to be found. If it is, that key is returned; if - not, the search continues, and the error code is retained if of higher - priority than the one currently set. - - (3) It then considers all the keyring-type keys in the keyring it's currently - searching. It calls key_permission(SEARCH) on each keyring, and if this - grants permission, it recurses, executing steps (2) and (3) on that - keyring. - -The process stops immediately a valid key is found with permission granted to -use it. Any error from a previous match attempt is discarded and the key is -returned. - -When search_process_keyrings() is invoked, it performs the following searches -until one succeeds: - - (1) If extant, the process's thread keyring is searched. - - (2) If extant, the process's process keyring is searched. - - (3) The process's session keyring is searched. - - (4) If the process has assumed the authority associated with a request_key() - authorisation key then: - - (a) If extant, the calling process's thread keyring is searched. - - (b) If extant, the calling process's process keyring is searched. - - (c) The calling process's session keyring is searched. - -The moment one succeeds, all pending errors are discarded and the found key is -returned. - -Only if all these fail does the whole thing fail with the highest priority -error. Note that several errors may have come from LSM. - -The error priority is: - - EKEYREVOKED > EKEYEXPIRED > ENOKEY - -EACCES/EPERM are only returned on a direct search of a specific keyring where -the basal keyring does not grant Search permission. diff --git a/Documentation/keys.txt b/Documentation/keys.txt deleted file mode 100644 index 6523a9e6f293..000000000000 --- a/Documentation/keys.txt +++ /dev/null @@ -1,1290 +0,0 @@ - ============================ - KERNEL KEY RETENTION SERVICE - ============================ - -This service allows cryptographic keys, authentication tokens, cross-domain -user mappings, and similar to be cached in the kernel for the use of -filesystems and other kernel services. - -Keyrings are permitted; these are a special type of key that can hold links to -other keys. Processes each have three standard keyring subscriptions that a -kernel service can search for relevant keys. - -The key service can be configured on by enabling: - - "Security options"/"Enable access key retention support" (CONFIG_KEYS) - -This document has the following sections: - - - Key overview - - Key service overview - - Key access permissions - - SELinux support - - New procfs files - - Userspace system call interface - - Kernel services - - Notes on accessing payload contents - - Defining a key type - - Request-key callback service - - Garbage collection - - -============ -KEY OVERVIEW -============ - -In this context, keys represent units of cryptographic data, authentication -tokens, keyrings, etc.. These are represented in the kernel by struct key. - -Each key has a number of attributes: - - - A serial number. - - A type. - - A description (for matching a key in a search). - - Access control information. - - An expiry time. - - A payload. - - State. - - - (*) Each key is issued a serial number of type key_serial_t that is unique for - the lifetime of that key. All serial numbers are positive non-zero 32-bit - integers. - - Userspace programs can use a key's serial numbers as a way to gain access - to it, subject to permission checking. - - (*) Each key is of a defined "type". Types must be registered inside the - kernel by a kernel service (such as a filesystem) before keys of that type - can be added or used. Userspace programs cannot define new types directly. - - Key types are represented in the kernel by struct key_type. This defines a - number of operations that can be performed on a key of that type. - - Should a type be removed from the system, all the keys of that type will - be invalidated. - - (*) Each key has a description. This should be a printable string. The key - type provides an operation to perform a match between the description on a - key and a criterion string. - - (*) Each key has an owner user ID, a group ID and a permissions mask. These - are used to control what a process may do to a key from userspace, and - whether a kernel service will be able to find the key. - - (*) Each key can be set to expire at a specific time by the key type's - instantiation function. Keys can also be immortal. - - (*) Each key can have a payload. This is a quantity of data that represent the - actual "key". In the case of a keyring, this is a list of keys to which - the keyring links; in the case of a user-defined key, it's an arbitrary - blob of data. - - Having a payload is not required; and the payload can, in fact, just be a - value stored in the struct key itself. - - When a key is instantiated, the key type's instantiation function is - called with a blob of data, and that then creates the key's payload in - some way. - - Similarly, when userspace wants to read back the contents of the key, if - permitted, another key type operation will be called to convert the key's - attached payload back into a blob of data. - - (*) Each key can be in one of a number of basic states: - - (*) Uninstantiated. The key exists, but does not have any data attached. - Keys being requested from userspace will be in this state. - - (*) Instantiated. This is the normal state. The key is fully formed, and - has data attached. - - (*) Negative. This is a relatively short-lived state. The key acts as a - note saying that a previous call out to userspace failed, and acts as - a throttle on key lookups. A negative key can be updated to a normal - state. - - (*) Expired. Keys can have lifetimes set. If their lifetime is exceeded, - they traverse to this state. An expired key can be updated back to a - normal state. - - (*) Revoked. A key is put in this state by userspace action. It can't be - found or operated upon (apart from by unlinking it). - - (*) Dead. The key's type was unregistered, and so the key is now useless. - -Keys in the last three states are subject to garbage collection. See the -section on "Garbage collection". - - -==================== -KEY SERVICE OVERVIEW -==================== - -The key service provides a number of features besides keys: - - (*) The key service defines two special key types: - - (+) "keyring" - - Keyrings are special keys that contain a list of other keys. Keyring - lists can be modified using various system calls. Keyrings should not - be given a payload when created. - - (+) "user" - - A key of this type has a description and a payload that are arbitrary - blobs of data. These can be created, updated and read by userspace, - and aren't intended for use by kernel services. - - (*) Each process subscribes to three keyrings: a thread-specific keyring, a - process-specific keyring, and a session-specific keyring. - - The thread-specific keyring is discarded from the child when any sort of - clone, fork, vfork or execve occurs. A new keyring is created only when - required. - - The process-specific keyring is replaced with an empty one in the child on - clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is - shared. execve also discards the process's process keyring and creates a - new one. - - The session-specific keyring is persistent across clone, fork, vfork and - execve, even when the latter executes a set-UID or set-GID binary. A - process can, however, replace its current session keyring with a new one - by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous - new one, or to attempt to create or join one of a specific name. - - The ownership of the thread keyring changes when the real UID and GID of - the thread changes. - - (*) Each user ID resident in the system holds two special keyrings: a user - specific keyring and a default user session keyring. The default session - keyring is initialised with a link to the user-specific keyring. - - When a process changes its real UID, if it used to have no session key, it - will be subscribed to the default session key for the new UID. - - If a process attempts to access its session key when it doesn't have one, - it will be subscribed to the default for its current UID. - - (*) Each user has two quotas against which the keys they own are tracked. One - limits the total number of keys and keyrings, the other limits the total - amount of description and payload space that can be consumed. - - The user can view information on this and other statistics through procfs - files. The root user may also alter the quota limits through sysctl files - (see the section "New procfs files"). - - Process-specific and thread-specific keyrings are not counted towards a - user's quota. - - If a system call that modifies a key or keyring in some way would put the - user over quota, the operation is refused and error EDQUOT is returned. - - (*) There's a system call interface by which userspace programs can create and - manipulate keys and keyrings. - - (*) There's a kernel interface by which services can register types and search - for keys. - - (*) There's a way for the a search done from the kernel to call back to - userspace to request a key that can't be found in a process's keyrings. - - (*) An optional filesystem is available through which the key database can be - viewed and manipulated. - - -====================== -KEY ACCESS PERMISSIONS -====================== - -Keys have an owner user ID, a group access ID, and a permissions mask. The mask -has up to eight bits each for possessor, user, group and other access. Only -six of each set of eight bits are defined. These permissions granted are: - - (*) View - - This permits a key or keyring's attributes to be viewed - including key - type and description. - - (*) Read - - This permits a key's payload to be viewed or a keyring's list of linked - keys. - - (*) Write - - This permits a key's payload to be instantiated or updated, or it allows a - link to be added to or removed from a keyring. - - (*) Search - - This permits keyrings to be searched and keys to be found. Searches can - only recurse into nested keyrings that have search permission set. - - (*) Link - - This permits a key or keyring to be linked to. To create a link from a - keyring to a key, a process must have Write permission on the keyring and - Link permission on the key. - - (*) Set Attribute - - This permits a key's UID, GID and permissions mask to be changed. - -For changing the ownership, group ID or permissions mask, being the owner of -the key or having the sysadmin capability is sufficient. - - -=============== -SELINUX SUPPORT -=============== - -The security class "key" has been added to SELinux so that mandatory access -controls can be applied to keys created within various contexts. This support -is preliminary, and is likely to change quite significantly in the near future. -Currently, all of the basic permissions explained above are provided in SELinux -as well; SELinux is simply invoked after all basic permission checks have been -performed. - -The value of the file /proc/self/attr/keycreate influences the labeling of -newly-created keys. If the contents of that file correspond to an SELinux -security context, then the key will be assigned that context. Otherwise, the -key will be assigned the current context of the task that invoked the key -creation request. Tasks must be granted explicit permission to assign a -particular context to newly-created keys, using the "create" permission in the -key security class. - -The default keyrings associated with users will be labeled with the default -context of the user if and only if the login programs have been instrumented to -properly initialize keycreate during the login process. Otherwise, they will -be labeled with the context of the login program itself. - -Note, however, that the default keyrings associated with the root user are -labeled with the default kernel context, since they are created early in the -boot process, before root has a chance to log in. - -The keyrings associated with new threads are each labeled with the context of -their associated thread, and both session and process keyrings are handled -similarly. - - -================ -NEW PROCFS FILES -================ - -Two files have been added to procfs by which an administrator can find out -about the status of the key service: - - (*) /proc/keys - - This lists the keys that are currently viewable by the task reading the - file, giving information about their type, description and permissions. - It is not possible to view the payload of the key this way, though some - information about it may be given. - - The only keys included in the list are those that grant View permission to - the reading process whether or not it possesses them. Note that LSM - security checks are still performed, and may further filter out keys that - the current process is not authorised to view. - - The contents of the file look like this: - - SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY - 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4 - 00000002 I----- 2 perm 1f3f0000 0 0 keyring _uid.0: empty - 00000007 I----- 1 perm 1f3f0000 0 0 keyring _pid.1: empty - 0000018d I----- 1 perm 1f3f0000 0 0 keyring _pid.412: empty - 000004d2 I--Q-- 1 perm 1f3f0000 32 -1 keyring _uid.32: 1/4 - 000004d3 I--Q-- 3 perm 1f3f0000 32 -1 keyring _uid_ses.32: empty - 00000892 I--QU- 1 perm 1f000000 0 0 user metal:copper: 0 - 00000893 I--Q-N 1 35s 1f3f0000 0 0 user metal:silver: 0 - 00000894 I--Q-- 1 10h 003f0000 0 0 user metal:gold: 0 - - The flags are: - - I Instantiated - R Revoked - D Dead - Q Contributes to user's quota - U Under construction by callback to userspace - N Negative key - - This file must be enabled at kernel configuration time as it allows anyone - to list the keys database. - - (*) /proc/key-users - - This file lists the tracking data for each user that has at least one key - on the system. Such data includes quota information and statistics: - - [root@andromeda root]# cat /proc/key-users - 0: 46 45/45 1/100 13/10000 - 29: 2 2/2 2/100 40/10000 - 32: 2 2/2 2/100 40/10000 - 38: 2 2/2 2/100 40/10000 - - The format of each line is - : User ID to which this applies - Structure refcount - / Total number of keys and number instantiated - / Key count quota - / Key size quota - - -Four new sysctl files have been added also for the purpose of controlling the -quota limits on keys: - - (*) /proc/sys/kernel/keys/root_maxkeys - /proc/sys/kernel/keys/root_maxbytes - - These files hold the maximum number of keys that root may have and the - maximum total number of bytes of data that root may have stored in those - keys. - - (*) /proc/sys/kernel/keys/maxkeys - /proc/sys/kernel/keys/maxbytes - - These files hold the maximum number of keys that each non-root user may - have and the maximum total number of bytes of data that each of those - users may have stored in their keys. - -Root may alter these by writing each new limit as a decimal number string to -the appropriate file. - - -=============================== -USERSPACE SYSTEM CALL INTERFACE -=============================== - -Userspace can manipulate keys directly through three new syscalls: add_key, -request_key and keyctl. The latter provides a number of functions for -manipulating keys. - -When referring to a key directly, userspace programs should use the key's -serial number (a positive 32-bit integer). However, there are some special -values available for referring to special keys and keyrings that relate to the -process making the call: - - CONSTANT VALUE KEY REFERENCED - ============================== ====== =========================== - KEY_SPEC_THREAD_KEYRING -1 thread-specific keyring - KEY_SPEC_PROCESS_KEYRING -2 process-specific keyring - KEY_SPEC_SESSION_KEYRING -3 session-specific keyring - KEY_SPEC_USER_KEYRING -4 UID-specific keyring - KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring - KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring - KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key() - authorisation key - - -The main syscalls are: - - (*) Create a new key of given type, description and payload and add it to the - nominated keyring: - - key_serial_t add_key(const char *type, const char *desc, - const void *payload, size_t plen, - key_serial_t keyring); - - If a key of the same type and description as that proposed already exists - in the keyring, this will try to update it with the given payload, or it - will return error EEXIST if that function is not supported by the key - type. The process must also have permission to write to the key to be able - to update it. The new key will have all user permissions granted and no - group or third party permissions. - - Otherwise, this will attempt to create a new key of the specified type and - description, and to instantiate it with the supplied payload and attach it - to the keyring. In this case, an error will be generated if the process - does not have permission to write to the keyring. - - The payload is optional, and the pointer can be NULL if not required by - the type. The payload is plen in size, and plen can be zero for an empty - payload. - - A new keyring can be generated by setting type "keyring", the keyring name - as the description (or NULL) and setting the payload to NULL. - - User defined keys can be created by specifying type "user". It is - recommended that a user defined key's description by prefixed with a type - ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting - ticket. - - Any other type must have been registered with the kernel in advance by a - kernel service such as a filesystem. - - The ID of the new or updated key is returned if successful. - - - (*) Search the process's keyrings for a key, potentially calling out to - userspace to create it. - - key_serial_t request_key(const char *type, const char *description, - const char *callout_info, - key_serial_t dest_keyring); - - This function searches all the process's keyrings in the order thread, - process, session for a matching key. This works very much like - KEYCTL_SEARCH, including the optional attachment of the discovered key to - a keyring. - - If a key cannot be found, and if callout_info is not NULL, then - /sbin/request-key will be invoked in an attempt to obtain a key. The - callout_info string will be passed as an argument to the program. - - See also Documentation/keys-request-key.txt. - - -The keyctl syscall functions are: - - (*) Map a special key ID to a real key ID for this process: - - key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id, - int create); - - The special key specified by "id" is looked up (with the key being created - if necessary) and the ID of the key or keyring thus found is returned if - it exists. - - If the key does not yet exist, the key will be created if "create" is - non-zero; and the error ENOKEY will be returned if "create" is zero. - - - (*) Replace the session keyring this process subscribes to with a new one: - - key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name); - - If name is NULL, an anonymous keyring is created attached to the process - as its session keyring, displacing the old session keyring. - - If name is not NULL, if a keyring of that name exists, the process - attempts to attach it as the session keyring, returning an error if that - is not permitted; otherwise a new keyring of that name is created and - attached as the session keyring. - - To attach to a named keyring, the keyring must have search permission for - the process's ownership. - - The ID of the new session keyring is returned if successful. - - - (*) Update the specified key: - - long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload, - size_t plen); - - This will try to update the specified key with the given payload, or it - will return error EOPNOTSUPP if that function is not supported by the key - type. The process must also have permission to write to the key to be able - to update it. - - The payload is of length plen, and may be absent or empty as for - add_key(). - - - (*) Revoke a key: - - long keyctl(KEYCTL_REVOKE, key_serial_t key); - - This makes a key unavailable for further operations. Further attempts to - use the key will be met with error EKEYREVOKED, and the key will no longer - be findable. - - - (*) Change the ownership of a key: - - long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid); - - This function permits a key's owner and group ID to be changed. Either one - of uid or gid can be set to -1 to suppress that change. - - Only the superuser can change a key's owner to something other than the - key's current owner. Similarly, only the superuser can change a key's - group ID to something other than the calling process's group ID or one of - its group list members. - - - (*) Change the permissions mask on a key: - - long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm); - - This function permits the owner of a key or the superuser to change the - permissions mask on a key. - - Only bits the available bits are permitted; if any other bits are set, - error EINVAL will be returned. - - - (*) Describe a key: - - long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer, - size_t buflen); - - This function returns a summary of the key's attributes (but not its - payload data) as a string in the buffer provided. - - Unless there's an error, it always returns the amount of data it could - produce, even if that's too big for the buffer, but it won't copy more - than requested to userspace. If the buffer pointer is NULL then no copy - will take place. - - A process must have view permission on the key for this function to be - successful. - - If successful, a string is placed in the buffer in the following format: - - ;;;; - - Where type and description are strings, uid and gid are decimal, and perm - is hexadecimal. A NUL character is included at the end of the string if - the buffer is sufficiently big. - - This can be parsed with - - sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc); - - - (*) Clear out a keyring: - - long keyctl(KEYCTL_CLEAR, key_serial_t keyring); - - This function clears the list of keys attached to a keyring. The calling - process must have write permission on the keyring, and it must be a - keyring (or else error ENOTDIR will result). - - - (*) Link a key into a keyring: - - long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key); - - This function creates a link from the keyring to the key. The process must - have write permission on the keyring and must have link permission on the - key. - - Should the keyring not be a keyring, error ENOTDIR will result; and if the - keyring is full, error ENFILE will result. - - The link procedure checks the nesting of the keyrings, returning ELOOP if - it appears too deep or EDEADLK if the link would introduce a cycle. - - Any links within the keyring to keys that match the new key in terms of - type and description will be discarded from the keyring as the new one is - added. - - - (*) Unlink a key or keyring from another keyring: - - long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key); - - This function looks through the keyring for the first link to the - specified key, and removes it if found. Subsequent links to that key are - ignored. The process must have write permission on the keyring. - - If the keyring is not a keyring, error ENOTDIR will result; and if the key - is not present, error ENOENT will be the result. - - - (*) Search a keyring tree for a key: - - key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring, - const char *type, const char *description, - key_serial_t dest_keyring); - - This searches the keyring tree headed by the specified keyring until a key - is found that matches the type and description criteria. Each keyring is - checked for keys before recursion into its children occurs. - - The process must have search permission on the top level keyring, or else - error EACCES will result. Only keyrings that the process has search - permission on will be recursed into, and only keys and keyrings for which - a process has search permission can be matched. If the specified keyring - is not a keyring, ENOTDIR will result. - - If the search succeeds, the function will attempt to link the found key - into the destination keyring if one is supplied (non-zero ID). All the - constraints applicable to KEYCTL_LINK apply in this case too. - - Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search - fails. On success, the resulting key ID will be returned. - - - (*) Read the payload data from a key: - - long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer, - size_t buflen); - - This function attempts to read the payload data from the specified key - into the buffer. The process must have read permission on the key to - succeed. - - The returned data will be processed for presentation by the key type. For - instance, a keyring will return an array of key_serial_t entries - representing the IDs of all the keys to which it is subscribed. The user - defined key type will return its data as is. If a key type does not - implement this function, error EOPNOTSUPP will result. - - As much of the data as can be fitted into the buffer will be copied to - userspace if the buffer pointer is not NULL. - - On a successful return, the function will always return the amount of data - available rather than the amount copied. - - - (*) Instantiate a partially constructed key. - - long keyctl(KEYCTL_INSTANTIATE, key_serial_t key, - const void *payload, size_t plen, - key_serial_t keyring); - long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key, - const struct iovec *payload_iov, unsigned ioc, - key_serial_t keyring); - - If the kernel calls back to userspace to complete the instantiation of a - key, userspace should use this call to supply data for the key before the - invoked process returns, or else the key will be marked negative - automatically. - - The process must have write access on the key to be able to instantiate - it, and the key must be uninstantiated. - - If a keyring is specified (non-zero), the key will also be linked into - that keyring, however all the constraints applying in KEYCTL_LINK apply in - this case too. - - The payload and plen arguments describe the payload data as for add_key(). - - The payload_iov and ioc arguments describe the payload data in an iovec - array instead of a single buffer. - - - (*) Negatively instantiate a partially constructed key. - - long keyctl(KEYCTL_NEGATE, key_serial_t key, - unsigned timeout, key_serial_t keyring); - long keyctl(KEYCTL_REJECT, key_serial_t key, - unsigned timeout, unsigned error, key_serial_t keyring); - - If the kernel calls back to userspace to complete the instantiation of a - key, userspace should use this call mark the key as negative before the - invoked process returns if it is unable to fulfil the request. - - The process must have write access on the key to be able to instantiate - it, and the key must be uninstantiated. - - If a keyring is specified (non-zero), the key will also be linked into - that keyring, however all the constraints applying in KEYCTL_LINK apply in - this case too. - - If the key is rejected, future searches for it will return the specified - error code until the rejected key expires. Negating the key is the same - as rejecting the key with ENOKEY as the error code. - - - (*) Set the default request-key destination keyring. - - long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl); - - This sets the default keyring to which implicitly requested keys will be - attached for this thread. reqkey_defl should be one of these constants: - - CONSTANT VALUE NEW DEFAULT KEYRING - ====================================== ====== ======================= - KEY_REQKEY_DEFL_NO_CHANGE -1 No change - KEY_REQKEY_DEFL_DEFAULT 0 Default[1] - KEY_REQKEY_DEFL_THREAD_KEYRING 1 Thread keyring - KEY_REQKEY_DEFL_PROCESS_KEYRING 2 Process keyring - KEY_REQKEY_DEFL_SESSION_KEYRING 3 Session keyring - KEY_REQKEY_DEFL_USER_KEYRING 4 User keyring - KEY_REQKEY_DEFL_USER_SESSION_KEYRING 5 User session keyring - KEY_REQKEY_DEFL_GROUP_KEYRING 6 Group keyring - - The old default will be returned if successful and error EINVAL will be - returned if reqkey_defl is not one of the above values. - - The default keyring can be overridden by the keyring indicated to the - request_key() system call. - - Note that this setting is inherited across fork/exec. - - [1] The default is: the thread keyring if there is one, otherwise - the process keyring if there is one, otherwise the session keyring if - there is one, otherwise the user default session keyring. - - - (*) Set the timeout on a key. - - long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout); - - This sets or clears the timeout on a key. The timeout can be 0 to clear - the timeout or a number of seconds to set the expiry time that far into - the future. - - The process must have attribute modification access on a key to set its - timeout. Timeouts may not be set with this function on negative, revoked - or expired keys. - - - (*) Assume the authority granted to instantiate a key - - long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key); - - This assumes or divests the authority required to instantiate the - specified key. Authority can only be assumed if the thread has the - authorisation key associated with the specified key in its keyrings - somewhere. - - Once authority is assumed, searches for keys will also search the - requester's keyrings using the requester's security label, UID, GID and - groups. - - If the requested authority is unavailable, error EPERM will be returned, - likewise if the authority has been revoked because the target key is - already instantiated. - - If the specified key is 0, then any assumed authority will be divested. - - The assumed authoritative key is inherited across fork and exec. - - - (*) Get the LSM security context attached to a key. - - long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer, - size_t buflen) - - This function returns a string that represents the LSM security context - attached to a key in the buffer provided. - - Unless there's an error, it always returns the amount of data it could - produce, even if that's too big for the buffer, but it won't copy more - than requested to userspace. If the buffer pointer is NULL then no copy - will take place. - - A NUL character is included at the end of the string if the buffer is - sufficiently big. This is included in the returned count. If no LSM is - in force then an empty string will be returned. - - A process must have view permission on the key for this function to be - successful. - - - (*) Install the calling process's session keyring on its parent. - - long keyctl(KEYCTL_SESSION_TO_PARENT); - - This functions attempts to install the calling process's session keyring - on to the calling process's parent, replacing the parent's current session - keyring. - - The calling process must have the same ownership as its parent, the - keyring must have the same ownership as the calling process, the calling - process must have LINK permission on the keyring and the active LSM module - mustn't deny permission, otherwise error EPERM will be returned. - - Error ENOMEM will be returned if there was insufficient memory to complete - the operation, otherwise 0 will be returned to indicate success. - - The keyring will be replaced next time the parent process leaves the - kernel and resumes executing userspace. - - -=============== -KERNEL SERVICES -=============== - -The kernel services for key management are fairly simple to deal with. They can -be broken down into two areas: keys and key types. - -Dealing with keys is fairly straightforward. Firstly, the kernel service -registers its type, then it searches for a key of that type. It should retain -the key as long as it has need of it, and then it should release it. For a -filesystem or device file, a search would probably be performed during the open -call, and the key released upon close. How to deal with conflicting keys due to -two different users opening the same file is left to the filesystem author to -solve. - -To access the key manager, the following header must be #included: - - - -Specific key types should have a header file under include/keys/ that should be -used to access that type. For keys of type "user", for example, that would be: - - - -Note that there are two different types of pointers to keys that may be -encountered: - - (*) struct key * - - This simply points to the key structure itself. Key structures will be at - least four-byte aligned. - - (*) key_ref_t - - This is equivalent to a struct key *, but the least significant bit is set - if the caller "possesses" the key. By "possession" it is meant that the - calling processes has a searchable link to the key from one of its - keyrings. There are three functions for dealing with these: - - key_ref_t make_key_ref(const struct key *key, - unsigned long possession); - - struct key *key_ref_to_ptr(const key_ref_t key_ref); - - unsigned long is_key_possessed(const key_ref_t key_ref); - - The first function constructs a key reference from a key pointer and - possession information (which must be 0 or 1 and not any other value). - - The second function retrieves the key pointer from a reference and the - third retrieves the possession flag. - -When accessing a key's payload contents, certain precautions must be taken to -prevent access vs modification races. See the section "Notes on accessing -payload contents" for more information. - -(*) To search for a key, call: - - struct key *request_key(const struct key_type *type, - const char *description, - const char *callout_info); - - This is used to request a key or keyring with a description that matches - the description specified according to the key type's match function. This - permits approximate matching to occur. If callout_string is not NULL, then - /sbin/request-key will be invoked in an attempt to obtain the key from - userspace. In that case, callout_string will be passed as an argument to - the program. - - Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be - returned. - - If successful, the key will have been attached to the default keyring for - implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING. - - See also Documentation/keys-request-key.txt. - - -(*) To search for a key, passing auxiliary data to the upcaller, call: - - struct key *request_key_with_auxdata(const struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len, - void *aux); - - This is identical to request_key(), except that the auxiliary data is - passed to the key_type->request_key() op if it exists, and the callout_info - is a blob of length callout_len, if given (the length may be 0). - - -(*) A key can be requested asynchronously by calling one of: - - struct key *request_key_async(const struct key_type *type, - const char *description, - const void *callout_info, - size_t callout_len); - - or: - - struct key *request_key_async_with_auxdata(const struct key_type *type, - const char *description, - const char *callout_info, - size_t callout_len, - void *aux); - - which are asynchronous equivalents of request_key() and - request_key_with_auxdata() respectively. - - These two functions return with the key potentially still under - construction. To wait for construction completion, the following should be - called: - - int wait_for_key_construction(struct key *key, bool intr); - - The function will wait for the key to finish being constructed and then - invokes key_validate() to return an appropriate value to indicate the state - of the key (0 indicates the key is usable). - - If intr is true, then the wait can be interrupted by a signal, in which - case error ERESTARTSYS will be returned. - - -(*) When it is no longer required, the key should be released using: - - void key_put(struct key *key); - - Or: - - void key_ref_put(key_ref_t key_ref); - - These can be called from interrupt context. If CONFIG_KEYS is not set then - the argument will not be parsed. - - -(*) Extra references can be made to a key by calling the following function: - - struct key *key_get(struct key *key); - - These need to be disposed of by calling key_put() when they've been - finished with. The key pointer passed in will be returned. If the pointer - is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and - no increment will take place. - - -(*) A key's serial number can be obtained by calling: - - key_serial_t key_serial(struct key *key); - - If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the - latter case without parsing the argument). - - -(*) If a keyring was found in the search, this can be further searched by: - - key_ref_t keyring_search(key_ref_t keyring_ref, - const struct key_type *type, - const char *description) - - This searches the keyring tree specified for a matching key. Error ENOKEY - is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful, - the returned key will need to be released. - - The possession attribute from the keyring reference is used to control - access through the permissions mask and is propagated to the returned key - reference pointer if successful. - - -(*) To check the validity of a key, this function can be called: - - int validate_key(struct key *key); - - This checks that the key in question hasn't expired or and hasn't been - revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will - be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be - returned (in the latter case without parsing the argument). - - -(*) To register a key type, the following function should be called: - - int register_key_type(struct key_type *type); - - This will return error EEXIST if a type of the same name is already - present. - - -(*) To unregister a key type, call: - - void unregister_key_type(struct key_type *type); - - -Under some circumstances, it may be desirable to deal with a bundle of keys. -The facility provides access to the keyring type for managing such a bundle: - - struct key_type key_type_keyring; - -This can be used with a function such as request_key() to find a specific -keyring in a process's keyrings. A keyring thus found can then be searched -with keyring_search(). Note that it is not possible to use request_key() to -search a specific keyring, so using keyrings in this way is of limited utility. - - -=================================== -NOTES ON ACCESSING PAYLOAD CONTENTS -=================================== - -The simplest payload is just a number in key->payload.value. In this case, -there's no need to indulge in RCU or locking when accessing the payload. - -More complex payload contents must be allocated and a pointer to them set in -key->payload.data. One of the following ways must be selected to access the -data: - - (1) Unmodifiable key type. - - If the key type does not have a modify method, then the key's payload can - be accessed without any form of locking, provided that it's known to be - instantiated (uninstantiated keys cannot be "found"). - - (2) The key's semaphore. - - The semaphore could be used to govern access to the payload and to control - the payload pointer. It must be write-locked for modifications and would - have to be read-locked for general access. The disadvantage of doing this - is that the accessor may be required to sleep. - - (3) RCU. - - RCU must be used when the semaphore isn't already held; if the semaphore - is held then the contents can't change under you unexpectedly as the - semaphore must still be used to serialise modifications to the key. The - key management code takes care of this for the key type. - - However, this means using: - - rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock() - - to read the pointer, and: - - rcu_dereference() ... rcu_assign_pointer() ... call_rcu() - - to set the pointer and dispose of the old contents after a grace period. - Note that only the key type should ever modify a key's payload. - - Furthermore, an RCU controlled payload must hold a struct rcu_head for the - use of call_rcu() and, if the payload is of variable size, the length of - the payload. key->datalen cannot be relied upon to be consistent with the - payload just dereferenced if the key's semaphore is not held. - - -=================== -DEFINING A KEY TYPE -=================== - -A kernel service may want to define its own key type. For instance, an AFS -filesystem might want to define a Kerberos 5 ticket key type. To do this, it -author fills in a key_type struct and registers it with the system. - -Source files that implement key types should include the following header file: - - - -The structure has a number of fields, some of which are mandatory: - - (*) const char *name - - The name of the key type. This is used to translate a key type name - supplied by userspace into a pointer to the structure. - - - (*) size_t def_datalen - - This is optional - it supplies the default payload data length as - contributed to the quota. If the key type's payload is always or almost - always the same size, then this is a more efficient way to do things. - - The data length (and quota) on a particular key can always be changed - during instantiation or update by calling: - - int key_payload_reserve(struct key *key, size_t datalen); - - With the revised data length. Error EDQUOT will be returned if this is not - viable. - - - (*) int (*vet_description)(const char *description); - - This optional method is called to vet a key description. If the key type - doesn't approve of the key description, it may return an error, otherwise - it should return 0. - - - (*) int (*instantiate)(struct key *key, const void *data, size_t datalen); - - This method is called to attach a payload to a key during construction. - The payload attached need not bear any relation to the data passed to this - function. - - If the amount of data attached to the key differs from the size in - keytype->def_datalen, then key_payload_reserve() should be called. - - This method does not have to lock the key in order to attach a payload. - The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents - anything else from gaining access to the key. - - It is safe to sleep in this method. - - - (*) int (*update)(struct key *key, const void *data, size_t datalen); - - If this type of key can be updated, then this method should be provided. - It is called to update a key's payload from the blob of data provided. - - key_payload_reserve() should be called if the data length might change - before any changes are actually made. Note that if this succeeds, the type - is committed to changing the key because it's already been altered, so all - memory allocation must be done first. - - The key will have its semaphore write-locked before this method is called, - but this only deters other writers; any changes to the key's payload must - be made under RCU conditions, and call_rcu() must be used to dispose of - the old payload. - - key_payload_reserve() should be called before the changes are made, but - after all allocations and other potentially failing function calls are - made. - - It is safe to sleep in this method. - - - (*) int (*match)(const struct key *key, const void *desc); - - This method is called to match a key against a description. It should - return non-zero if the two match, zero if they don't. - - This method should not need to lock the key in any way. The type and - description can be considered invariant, and the payload should not be - accessed (the key may not yet be instantiated). - - It is not safe to sleep in this method; the caller may hold spinlocks. - - - (*) void (*revoke)(struct key *key); - - This method is optional. It is called to discard part of the payload - data upon a key being revoked. The caller will have the key semaphore - write-locked. - - It is safe to sleep in this method, though care should be taken to avoid - a deadlock against the key semaphore. - - - (*) void (*destroy)(struct key *key); - - This method is optional. It is called to discard the payload data on a key - when it is being destroyed. - - This method does not need to lock the key to access the payload; it can - consider the key as being inaccessible at this time. Note that the key's - type may have been changed before this function is called. - - It is not safe to sleep in this method; the caller may hold spinlocks. - - - (*) void (*describe)(const struct key *key, struct seq_file *p); - - This method is optional. It is called during /proc/keys reading to - summarise a key's description and payload in text form. - - This method will be called with the RCU read lock held. rcu_dereference() - should be used to read the payload pointer if the payload is to be - accessed. key->datalen cannot be trusted to stay consistent with the - contents of the payload. - - The description will not change, though the key's state may. - - It is not safe to sleep in this method; the RCU read lock is held by the - caller. - - - (*) long (*read)(const struct key *key, char __user *buffer, size_t buflen); - - This method is optional. It is called by KEYCTL_READ to translate the - key's payload into something a blob of data for userspace to deal with. - Ideally, the blob should be in the same format as that passed in to the - instantiate and update methods. - - If successful, the blob size that could be produced should be returned - rather than the size copied. - - This method will be called with the key's semaphore read-locked. This will - prevent the key's payload changing. It is not necessary to use RCU locking - when accessing the key's payload. It is safe to sleep in this method, such - as might happen when the userspace buffer is accessed. - - - (*) int (*request_key)(struct key_construction *cons, const char *op, - void *aux); - - This method is optional. If provided, request_key() and friends will - invoke this function rather than upcalling to /sbin/request-key to operate - upon a key of this type. - - The aux parameter is as passed to request_key_async_with_auxdata() and - similar or is NULL otherwise. Also passed are the construction record for - the key to be operated upon and the operation type (currently only - "create"). - - This method is permitted to return before the upcall is complete, but the - following function must be called under all circumstances to complete the - instantiation process, whether or not it succeeds, whether or not there's - an error: - - void complete_request_key(struct key_construction *cons, int error); - - The error parameter should be 0 on success, -ve on error. The - construction record is destroyed by this action and the authorisation key - will be revoked. If an error is indicated, the key under construction - will be negatively instantiated if it wasn't already instantiated. - - If this method returns an error, that error will be returned to the - caller of request_key*(). complete_request_key() must be called prior to - returning. - - The key under construction and the authorisation key can be found in the - key_construction struct pointed to by cons: - - (*) struct key *key; - - The key under construction. - - (*) struct key *authkey; - - The authorisation key. - - -============================ -REQUEST-KEY CALLBACK SERVICE -============================ - -To create a new key, the kernel will attempt to execute the following command -line: - - /sbin/request-key create \ - - - is the key being constructed, and the three keyrings are the process -keyrings from the process that caused the search to be issued. These are -included for two reasons: - - (1) There may be an authentication token in one of the keyrings that is - required to obtain the key, eg: a Kerberos Ticket-Granting Ticket. - - (2) The new key should probably be cached in one of these rings. - -This program should set it UID and GID to those specified before attempting to -access any more keys. It may then look around for a user specific process to -hand the request off to (perhaps a path held in placed in another key by, for -example, the KDE desktop manager). - -The program (or whatever it calls) should finish construction of the key by -calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to -cache the key in one of the keyrings (probably the session ring) before -returning. Alternatively, the key can be marked as negative with KEYCTL_NEGATE -or KEYCTL_REJECT; this also permits the key to be cached in one of the -keyrings. - -If it returns with the key remaining in the unconstructed state, the key will -be marked as being negative, it will be added to the session keyring, and an -error will be returned to the key requestor. - -Supplementary information may be provided from whoever or whatever invoked this -service. This will be passed as the parameter. If no such -information was made available, then "-" will be passed as this parameter -instead. - - -Similarly, the kernel may attempt to update an expired or a soon to expire key -by executing: - - /sbin/request-key update \ - - -In this case, the program isn't required to actually attach the key to a ring; -the rings are provided for reference. - - -================== -GARBAGE COLLECTION -================== - -Dead keys (for which the type has been removed) will be automatically unlinked -from those keyrings that point to them and deleted as soon as possible by a -background garbage collector. - -Similarly, revoked and expired keys will be garbage collected, but only after a -certain amount of time has passed. This time is set as a number of seconds in: - - /proc/sys/kernel/keys/gc_delay diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt index 090e6ee04536..51063e681ca4 100644 --- a/Documentation/kmemleak.txt +++ b/Documentation/kmemleak.txt @@ -11,7 +11,9 @@ with the difference that the orphan objects are not freed but only reported via /sys/kernel/debug/kmemleak. A similar method is used by the Valgrind tool (memcheck --leak-check) to detect the memory leaks in user-space applications. -Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze and tile. + +Please check DEBUG_KMEMLEAK dependencies in lib/Kconfig.debug for supported +architectures. Usage ----- diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt deleted file mode 100644 index 9bef4e4cec50..000000000000 --- a/Documentation/kvm/api.txt +++ /dev/null @@ -1,1451 +0,0 @@ -The Definitive KVM (Kernel-based Virtual Machine) API Documentation -=================================================================== - -1. General description - -The kvm API is a set of ioctls that are issued to control various aspects -of a virtual machine. The ioctls belong to three classes - - - System ioctls: These query and set global attributes which affect the - whole kvm subsystem. In addition a system ioctl is used to create - virtual machines - - - VM ioctls: These query and set attributes that affect an entire virtual - machine, for example memory layout. In addition a VM ioctl is used to - create virtual cpus (vcpus). - - Only run VM ioctls from the same process (address space) that was used - to create the VM. - - - vcpu ioctls: These query and set attributes that control the operation - of a single virtual cpu. - - Only run vcpu ioctls from the same thread that was used to create the - vcpu. - -2. File descriptors - -The kvm API is centered around file descriptors. An initial -open("/dev/kvm") obtains a handle to the kvm subsystem; this handle -can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this -handle will create a VM file descriptor which can be used to issue VM -ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu -and return a file descriptor pointing to it. Finally, ioctls on a vcpu -fd can be used to control the vcpu, including the important task of -actually running guest code. - -In general file descriptors can be migrated among processes by means -of fork() and the SCM_RIGHTS facility of unix domain socket. These -kinds of tricks are explicitly not supported by kvm. While they will -not cause harm to the host, their actual behavior is not guaranteed by -the API. The only supported use is one virtual machine per process, -and one vcpu per thread. - -3. Extensions - -As of Linux 2.6.22, the KVM ABI has been stabilized: no backward -incompatible change are allowed. However, there is an extension -facility that allows backward-compatible extensions to the API to be -queried and used. - -The extension mechanism is not based on on the Linux version number. -Instead, kvm defines extension identifiers and a facility to query -whether a particular extension identifier is available. If it is, a -set of ioctls is available for application use. - -4. API description - -This section describes ioctls that can be used to control kvm guests. -For each ioctl, the following information is provided along with a -description: - - Capability: which KVM extension provides this ioctl. Can be 'basic', - which means that is will be provided by any kernel that supports - API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which - means availability needs to be checked with KVM_CHECK_EXTENSION - (see section 4.4). - - Architectures: which instruction set architectures provide this ioctl. - x86 includes both i386 and x86_64. - - Type: system, vm, or vcpu. - - Parameters: what parameters are accepted by the ioctl. - - Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) - are not detailed, but errors with specific meanings are. - -4.1 KVM_GET_API_VERSION - -Capability: basic -Architectures: all -Type: system ioctl -Parameters: none -Returns: the constant KVM_API_VERSION (=12) - -This identifies the API version as the stable kvm API. It is not -expected that this number will change. However, Linux 2.6.20 and -2.6.21 report earlier versions; these are not documented and not -supported. Applications should refuse to run if KVM_GET_API_VERSION -returns a value other than 12. If this check passes, all ioctls -described as 'basic' will be available. - -4.2 KVM_CREATE_VM - -Capability: basic -Architectures: all -Type: system ioctl -Parameters: none -Returns: a VM fd that can be used to control the new virtual machine. - -The new VM has no virtual cpus and no memory. An mmap() of a VM fd -will access the virtual machine's physical address space; offset zero -corresponds to guest physical address zero. Use of mmap() on a VM fd -is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is -available. - -4.3 KVM_GET_MSR_INDEX_LIST - -Capability: basic -Architectures: x86 -Type: system -Parameters: struct kvm_msr_list (in/out) -Returns: 0 on success; -1 on error -Errors: - E2BIG: the msr index list is to be to fit in the array specified by - the user. - -struct kvm_msr_list { - __u32 nmsrs; /* number of msrs in entries */ - __u32 indices[0]; -}; - -This ioctl returns the guest msrs that are supported. The list varies -by kvm version and host processor, but does not change otherwise. The -user fills in the size of the indices array in nmsrs, and in return -kvm adjusts nmsrs to reflect the actual number of msrs and fills in -the indices array with their numbers. - -Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are -not returned in the MSR list, as different vcpus can have a different number -of banks, as set via the KVM_X86_SETUP_MCE ioctl. - -4.4 KVM_CHECK_EXTENSION - -Capability: basic -Architectures: all -Type: system ioctl -Parameters: extension identifier (KVM_CAP_*) -Returns: 0 if unsupported; 1 (or some other positive integer) if supported - -The API allows the application to query about extensions to the core -kvm API. Userspace passes an extension identifier (an integer) and -receives an integer that describes the extension availability. -Generally 0 means no and 1 means yes, but some extensions may report -additional information in the integer return value. - -4.5 KVM_GET_VCPU_MMAP_SIZE - -Capability: basic -Architectures: all -Type: system ioctl -Parameters: none -Returns: size of vcpu mmap area, in bytes - -The KVM_RUN ioctl (cf.) communicates with userspace via a shared -memory region. This ioctl returns the size of that region. See the -KVM_RUN documentation for details. - -4.6 KVM_SET_MEMORY_REGION - -Capability: basic -Architectures: all -Type: vm ioctl -Parameters: struct kvm_memory_region (in) -Returns: 0 on success, -1 on error - -This ioctl is obsolete and has been removed. - -4.7 KVM_CREATE_VCPU - -Capability: basic -Architectures: all -Type: vm ioctl -Parameters: vcpu id (apic id on x86) -Returns: vcpu fd on success, -1 on error - -This API adds a vcpu to a virtual machine. The vcpu id is a small integer -in the range [0, max_vcpus). - -4.8 KVM_GET_DIRTY_LOG (vm ioctl) - -Capability: basic -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_dirty_log (in/out) -Returns: 0 on success, -1 on error - -/* for KVM_GET_DIRTY_LOG */ -struct kvm_dirty_log { - __u32 slot; - __u32 padding; - union { - void __user *dirty_bitmap; /* one bit per page */ - __u64 padding; - }; -}; - -Given a memory slot, return a bitmap containing any pages dirtied -since the last call to this ioctl. Bit 0 is the first page in the -memory slot. Ensure the entire structure is cleared to avoid padding -issues. - -4.9 KVM_SET_MEMORY_ALIAS - -Capability: basic -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_memory_alias (in) -Returns: 0 (success), -1 (error) - -This ioctl is obsolete and has been removed. - -4.10 KVM_RUN - -Capability: basic -Architectures: all -Type: vcpu ioctl -Parameters: none -Returns: 0 on success, -1 on error -Errors: - EINTR: an unmasked signal is pending - -This ioctl is used to run a guest virtual cpu. While there are no -explicit parameters, there is an implicit parameter block that can be -obtained by mmap()ing the vcpu fd at offset 0, with the size given by -KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct -kvm_run' (see below). - -4.11 KVM_GET_REGS - -Capability: basic -Architectures: all -Type: vcpu ioctl -Parameters: struct kvm_regs (out) -Returns: 0 on success, -1 on error - -Reads the general purpose registers from the vcpu. - -/* x86 */ -struct kvm_regs { - /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ - __u64 rax, rbx, rcx, rdx; - __u64 rsi, rdi, rsp, rbp; - __u64 r8, r9, r10, r11; - __u64 r12, r13, r14, r15; - __u64 rip, rflags; -}; - -4.12 KVM_SET_REGS - -Capability: basic -Architectures: all -Type: vcpu ioctl -Parameters: struct kvm_regs (in) -Returns: 0 on success, -1 on error - -Writes the general purpose registers into the vcpu. - -See KVM_GET_REGS for the data structure. - -4.13 KVM_GET_SREGS - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_sregs (out) -Returns: 0 on success, -1 on error - -Reads special registers from the vcpu. - -/* x86 */ -struct kvm_sregs { - struct kvm_segment cs, ds, es, fs, gs, ss; - struct kvm_segment tr, ldt; - struct kvm_dtable gdt, idt; - __u64 cr0, cr2, cr3, cr4, cr8; - __u64 efer; - __u64 apic_base; - __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; -}; - -interrupt_bitmap is a bitmap of pending external interrupts. At most -one bit may be set. This interrupt has been acknowledged by the APIC -but not yet injected into the cpu core. - -4.14 KVM_SET_SREGS - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_sregs (in) -Returns: 0 on success, -1 on error - -Writes special registers into the vcpu. See KVM_GET_SREGS for the -data structures. - -4.15 KVM_TRANSLATE - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_translation (in/out) -Returns: 0 on success, -1 on error - -Translates a virtual address according to the vcpu's current address -translation mode. - -struct kvm_translation { - /* in */ - __u64 linear_address; - - /* out */ - __u64 physical_address; - __u8 valid; - __u8 writeable; - __u8 usermode; - __u8 pad[5]; -}; - -4.16 KVM_INTERRUPT - -Capability: basic -Architectures: x86, ppc -Type: vcpu ioctl -Parameters: struct kvm_interrupt (in) -Returns: 0 on success, -1 on error - -Queues a hardware interrupt vector to be injected. This is only -useful if in-kernel local APIC or equivalent is not used. - -/* for KVM_INTERRUPT */ -struct kvm_interrupt { - /* in */ - __u32 irq; -}; - -X86: - -Note 'irq' is an interrupt vector, not an interrupt pin or line. - -PPC: - -Queues an external interrupt to be injected. This ioctl is overleaded -with 3 different irq values: - -a) KVM_INTERRUPT_SET - - This injects an edge type external interrupt into the guest once it's ready - to receive interrupts. When injected, the interrupt is done. - -b) KVM_INTERRUPT_UNSET - - This unsets any pending interrupt. - - Only available with KVM_CAP_PPC_UNSET_IRQ. - -c) KVM_INTERRUPT_SET_LEVEL - - This injects a level type external interrupt into the guest context. The - interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET - is triggered. - - Only available with KVM_CAP_PPC_IRQ_LEVEL. - -Note that any value for 'irq' other than the ones stated above is invalid -and incurs unexpected behavior. - -4.17 KVM_DEBUG_GUEST - -Capability: basic -Architectures: none -Type: vcpu ioctl -Parameters: none) -Returns: -1 on error - -Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead. - -4.18 KVM_GET_MSRS - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_msrs (in/out) -Returns: 0 on success, -1 on error - -Reads model-specific registers from the vcpu. Supported msr indices can -be obtained using KVM_GET_MSR_INDEX_LIST. - -struct kvm_msrs { - __u32 nmsrs; /* number of msrs in entries */ - __u32 pad; - - struct kvm_msr_entry entries[0]; -}; - -struct kvm_msr_entry { - __u32 index; - __u32 reserved; - __u64 data; -}; - -Application code should set the 'nmsrs' member (which indicates the -size of the entries array) and the 'index' member of each array entry. -kvm will fill in the 'data' member. - -4.19 KVM_SET_MSRS - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_msrs (in) -Returns: 0 on success, -1 on error - -Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the -data structures. - -Application code should set the 'nmsrs' member (which indicates the -size of the entries array), and the 'index' and 'data' members of each -array entry. - -4.20 KVM_SET_CPUID - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_cpuid (in) -Returns: 0 on success, -1 on error - -Defines the vcpu responses to the cpuid instruction. Applications -should use the KVM_SET_CPUID2 ioctl if available. - - -struct kvm_cpuid_entry { - __u32 function; - __u32 eax; - __u32 ebx; - __u32 ecx; - __u32 edx; - __u32 padding; -}; - -/* for KVM_SET_CPUID */ -struct kvm_cpuid { - __u32 nent; - __u32 padding; - struct kvm_cpuid_entry entries[0]; -}; - -4.21 KVM_SET_SIGNAL_MASK - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_signal_mask (in) -Returns: 0 on success, -1 on error - -Defines which signals are blocked during execution of KVM_RUN. This -signal mask temporarily overrides the threads signal mask. Any -unblocked signal received (except SIGKILL and SIGSTOP, which retain -their traditional behaviour) will cause KVM_RUN to return with -EINTR. - -Note the signal will only be delivered if not blocked by the original -signal mask. - -/* for KVM_SET_SIGNAL_MASK */ -struct kvm_signal_mask { - __u32 len; - __u8 sigset[0]; -}; - -4.22 KVM_GET_FPU - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_fpu (out) -Returns: 0 on success, -1 on error - -Reads the floating point state from the vcpu. - -/* for KVM_GET_FPU and KVM_SET_FPU */ -struct kvm_fpu { - __u8 fpr[8][16]; - __u16 fcw; - __u16 fsw; - __u8 ftwx; /* in fxsave format */ - __u8 pad1; - __u16 last_opcode; - __u64 last_ip; - __u64 last_dp; - __u8 xmm[16][16]; - __u32 mxcsr; - __u32 pad2; -}; - -4.23 KVM_SET_FPU - -Capability: basic -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_fpu (in) -Returns: 0 on success, -1 on error - -Writes the floating point state to the vcpu. - -/* for KVM_GET_FPU and KVM_SET_FPU */ -struct kvm_fpu { - __u8 fpr[8][16]; - __u16 fcw; - __u16 fsw; - __u8 ftwx; /* in fxsave format */ - __u8 pad1; - __u16 last_opcode; - __u64 last_ip; - __u64 last_dp; - __u8 xmm[16][16]; - __u32 mxcsr; - __u32 pad2; -}; - -4.24 KVM_CREATE_IRQCHIP - -Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 -Type: vm ioctl -Parameters: none -Returns: 0 on success, -1 on error - -Creates an interrupt controller model in the kernel. On x86, creates a virtual -ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a -local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 -only go to the IOAPIC. On ia64, a IOSAPIC is created. - -4.25 KVM_IRQ_LINE - -Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 -Type: vm ioctl -Parameters: struct kvm_irq_level -Returns: 0 on success, -1 on error - -Sets the level of a GSI input to the interrupt controller model in the kernel. -Requires that an interrupt controller model has been previously created with -KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level -to be set to 1 and then back to 0. - -struct kvm_irq_level { - union { - __u32 irq; /* GSI */ - __s32 status; /* not used for KVM_IRQ_LEVEL */ - }; - __u32 level; /* 0 or 1 */ -}; - -4.26 KVM_GET_IRQCHIP - -Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 -Type: vm ioctl -Parameters: struct kvm_irqchip (in/out) -Returns: 0 on success, -1 on error - -Reads the state of a kernel interrupt controller created with -KVM_CREATE_IRQCHIP into a buffer provided by the caller. - -struct kvm_irqchip { - __u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */ - __u32 pad; - union { - char dummy[512]; /* reserving space */ - struct kvm_pic_state pic; - struct kvm_ioapic_state ioapic; - } chip; -}; - -4.27 KVM_SET_IRQCHIP - -Capability: KVM_CAP_IRQCHIP -Architectures: x86, ia64 -Type: vm ioctl -Parameters: struct kvm_irqchip (in) -Returns: 0 on success, -1 on error - -Sets the state of a kernel interrupt controller created with -KVM_CREATE_IRQCHIP from a buffer provided by the caller. - -struct kvm_irqchip { - __u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */ - __u32 pad; - union { - char dummy[512]; /* reserving space */ - struct kvm_pic_state pic; - struct kvm_ioapic_state ioapic; - } chip; -}; - -4.28 KVM_XEN_HVM_CONFIG - -Capability: KVM_CAP_XEN_HVM -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_xen_hvm_config (in) -Returns: 0 on success, -1 on error - -Sets the MSR that the Xen HVM guest uses to initialize its hypercall -page, and provides the starting address and size of the hypercall -blobs in userspace. When the guest writes the MSR, kvm copies one -page of a blob (32- or 64-bit, depending on the vcpu mode) to guest -memory. - -struct kvm_xen_hvm_config { - __u32 flags; - __u32 msr; - __u64 blob_addr_32; - __u64 blob_addr_64; - __u8 blob_size_32; - __u8 blob_size_64; - __u8 pad2[30]; -}; - -4.29 KVM_GET_CLOCK - -Capability: KVM_CAP_ADJUST_CLOCK -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_clock_data (out) -Returns: 0 on success, -1 on error - -Gets the current timestamp of kvmclock as seen by the current guest. In -conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios -such as migration. - -struct kvm_clock_data { - __u64 clock; /* kvmclock current value */ - __u32 flags; - __u32 pad[9]; -}; - -4.30 KVM_SET_CLOCK - -Capability: KVM_CAP_ADJUST_CLOCK -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_clock_data (in) -Returns: 0 on success, -1 on error - -Sets the current timestamp of kvmclock to the value specified in its parameter. -In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios -such as migration. - -struct kvm_clock_data { - __u64 clock; /* kvmclock current value */ - __u32 flags; - __u32 pad[9]; -}; - -4.31 KVM_GET_VCPU_EVENTS - -Capability: KVM_CAP_VCPU_EVENTS -Extended by: KVM_CAP_INTR_SHADOW -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_vcpu_event (out) -Returns: 0 on success, -1 on error - -Gets currently pending exceptions, interrupts, and NMIs as well as related -states of the vcpu. - -struct kvm_vcpu_events { - struct { - __u8 injected; - __u8 nr; - __u8 has_error_code; - __u8 pad; - __u32 error_code; - } exception; - struct { - __u8 injected; - __u8 nr; - __u8 soft; - __u8 shadow; - } interrupt; - struct { - __u8 injected; - __u8 pending; - __u8 masked; - __u8 pad; - } nmi; - __u32 sipi_vector; - __u32 flags; -}; - -KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that -interrupt.shadow contains a valid state. Otherwise, this field is undefined. - -4.32 KVM_SET_VCPU_EVENTS - -Capability: KVM_CAP_VCPU_EVENTS -Extended by: KVM_CAP_INTR_SHADOW -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_vcpu_event (in) -Returns: 0 on success, -1 on error - -Set pending exceptions, interrupts, and NMIs as well as related states of the -vcpu. - -See KVM_GET_VCPU_EVENTS for the data structure. - -Fields that may be modified asynchronously by running VCPUs can be excluded -from the update. These fields are nmi.pending and sipi_vector. Keep the -corresponding bits in the flags field cleared to suppress overwriting the -current in-kernel state. The bits are: - -KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel -KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector - -If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in -the flags field to signal that interrupt.shadow contains a valid state and -shall be written into the VCPU. - -4.33 KVM_GET_DEBUGREGS - -Capability: KVM_CAP_DEBUGREGS -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_debugregs (out) -Returns: 0 on success, -1 on error - -Reads debug registers from the vcpu. - -struct kvm_debugregs { - __u64 db[4]; - __u64 dr6; - __u64 dr7; - __u64 flags; - __u64 reserved[9]; -}; - -4.34 KVM_SET_DEBUGREGS - -Capability: KVM_CAP_DEBUGREGS -Architectures: x86 -Type: vm ioctl -Parameters: struct kvm_debugregs (in) -Returns: 0 on success, -1 on error - -Writes debug registers into the vcpu. - -See KVM_GET_DEBUGREGS for the data structure. The flags field is unused -yet and must be cleared on entry. - -4.35 KVM_SET_USER_MEMORY_REGION - -Capability: KVM_CAP_USER_MEM -Architectures: all -Type: vm ioctl -Parameters: struct kvm_userspace_memory_region (in) -Returns: 0 on success, -1 on error - -struct kvm_userspace_memory_region { - __u32 slot; - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; /* bytes */ - __u64 userspace_addr; /* start of the userspace allocated memory */ -}; - -/* for kvm_memory_region::flags */ -#define KVM_MEM_LOG_DIRTY_PAGES 1UL - -This ioctl allows the user to create or modify a guest physical memory -slot. When changing an existing slot, it may be moved in the guest -physical memory space, or its flags may be modified. It may not be -resized. Slots may not overlap in guest physical address space. - -Memory for the region is taken starting at the address denoted by the -field userspace_addr, which must point at user addressable memory for -the entire memory slot size. Any object may back this memory, including -anonymous memory, ordinary files, and hugetlbfs. - -It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr -be identical. This allows large pages in the guest to be backed by large -pages in the host. - -The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which -instructs kvm to keep track of writes to memory within the slot. See -the KVM_GET_DIRTY_LOG ioctl. - -When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory -region are automatically reflected into the guest. For example, an mmap() -that affects the region will be made visible immediately. Another example -is madvise(MADV_DROP). - -It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. -The KVM_SET_MEMORY_REGION does not allow fine grained control over memory -allocation and is deprecated. - -4.36 KVM_SET_TSS_ADDR - -Capability: KVM_CAP_SET_TSS_ADDR -Architectures: x86 -Type: vm ioctl -Parameters: unsigned long tss_address (in) -Returns: 0 on success, -1 on error - -This ioctl defines the physical address of a three-page region in the guest -physical address space. The region must be within the first 4GB of the -guest physical address space and must not conflict with any memory slot -or any mmio address. The guest may malfunction if it accesses this memory -region. - -This ioctl is required on Intel-based hosts. This is needed on Intel hardware -because of a quirk in the virtualization implementation (see the internals -documentation when it pops into existence). - -4.37 KVM_ENABLE_CAP - -Capability: KVM_CAP_ENABLE_CAP -Architectures: ppc -Type: vcpu ioctl -Parameters: struct kvm_enable_cap (in) -Returns: 0 on success; -1 on error - -+Not all extensions are enabled by default. Using this ioctl the application -can enable an extension, making it available to the guest. - -On systems that do not support this ioctl, it always fails. On systems that -do support it, it only works for extensions that are supported for enablement. - -To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should -be used. - -struct kvm_enable_cap { - /* in */ - __u32 cap; - -The capability that is supposed to get enabled. - - __u32 flags; - -A bitfield indicating future enhancements. Has to be 0 for now. - - __u64 args[4]; - -Arguments for enabling a feature. If a feature needs initial values to -function properly, this is the place to put them. - - __u8 pad[64]; -}; - -4.38 KVM_GET_MP_STATE - -Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64 -Type: vcpu ioctl -Parameters: struct kvm_mp_state (out) -Returns: 0 on success; -1 on error - -struct kvm_mp_state { - __u32 mp_state; -}; - -Returns the vcpu's current "multiprocessing state" (though also valid on -uniprocessor guests). - -Possible values are: - - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running - - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) - which has not yet received an INIT signal - - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is - now ready for a SIPI - - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and - is waiting for an interrupt - - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector - accessible via KVM_GET_VCPU_EVENTS) - -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. - -4.39 KVM_SET_MP_STATE - -Capability: KVM_CAP_MP_STATE -Architectures: x86, ia64 -Type: vcpu ioctl -Parameters: struct kvm_mp_state (in) -Returns: 0 on success; -1 on error - -Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for -arguments. - -This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel -irqchip, the multiprocessing state must be maintained by userspace. - -4.40 KVM_SET_IDENTITY_MAP_ADDR - -Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR -Architectures: x86 -Type: vm ioctl -Parameters: unsigned long identity (in) -Returns: 0 on success, -1 on error - -This ioctl defines the physical address of a one-page region in the guest -physical address space. The region must be within the first 4GB of the -guest physical address space and must not conflict with any memory slot -or any mmio address. The guest may malfunction if it accesses this memory -region. - -This ioctl is required on Intel-based hosts. This is needed on Intel hardware -because of a quirk in the virtualization implementation (see the internals -documentation when it pops into existence). - -4.41 KVM_SET_BOOT_CPU_ID - -Capability: KVM_CAP_SET_BOOT_CPU_ID -Architectures: x86, ia64 -Type: vm ioctl -Parameters: unsigned long vcpu_id -Returns: 0 on success, -1 on error - -Define which vcpu is the Bootstrap Processor (BSP). Values are the same -as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default -is vcpu 0. - -4.42 KVM_GET_XSAVE - -Capability: KVM_CAP_XSAVE -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_xsave (out) -Returns: 0 on success, -1 on error - -struct kvm_xsave { - __u32 region[1024]; -}; - -This ioctl would copy current vcpu's xsave struct to the userspace. - -4.43 KVM_SET_XSAVE - -Capability: KVM_CAP_XSAVE -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_xsave (in) -Returns: 0 on success, -1 on error - -struct kvm_xsave { - __u32 region[1024]; -}; - -This ioctl would copy userspace's xsave struct to the kernel. - -4.44 KVM_GET_XCRS - -Capability: KVM_CAP_XCRS -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_xcrs (out) -Returns: 0 on success, -1 on error - -struct kvm_xcr { - __u32 xcr; - __u32 reserved; - __u64 value; -}; - -struct kvm_xcrs { - __u32 nr_xcrs; - __u32 flags; - struct kvm_xcr xcrs[KVM_MAX_XCRS]; - __u64 padding[16]; -}; - -This ioctl would copy current vcpu's xcrs to the userspace. - -4.45 KVM_SET_XCRS - -Capability: KVM_CAP_XCRS -Architectures: x86 -Type: vcpu ioctl -Parameters: struct kvm_xcrs (in) -Returns: 0 on success, -1 on error - -struct kvm_xcr { - __u32 xcr; - __u32 reserved; - __u64 value; -}; - -struct kvm_xcrs { - __u32 nr_xcrs; - __u32 flags; - struct kvm_xcr xcrs[KVM_MAX_XCRS]; - __u64 padding[16]; -}; - -This ioctl would set vcpu's xcr to the value userspace specified. - -4.46 KVM_GET_SUPPORTED_CPUID - -Capability: KVM_CAP_EXT_CPUID -Architectures: x86 -Type: system ioctl -Parameters: struct kvm_cpuid2 (in/out) -Returns: 0 on success, -1 on error - -struct kvm_cpuid2 { - __u32 nent; - __u32 padding; - struct kvm_cpuid_entry2 entries[0]; -}; - -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 - -struct kvm_cpuid_entry2 { - __u32 function; - __u32 index; - __u32 flags; - __u32 eax; - __u32 ebx; - __u32 ecx; - __u32 edx; - __u32 padding[3]; -}; - -This ioctl returns x86 cpuid features which are supported by both the hardware -and kvm. Userspace can use the information returned by this ioctl to -construct cpuid information (for KVM_SET_CPUID2) that is consistent with -hardware, kernel, and userspace capabilities, and with user requirements (for -example, the user may wish to constrain cpuid to emulate older hardware, -or for feature consistency across a cluster). - -Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure -with the 'nent' field indicating the number of entries in the variable-size -array 'entries'. If the number of entries is too low to describe the cpu -capabilities, an error (E2BIG) is returned. If the number is too high, -the 'nent' field is adjusted and an error (ENOMEM) is returned. If the -number is just right, the 'nent' field is adjusted to the number of valid -entries in the 'entries' array, which is then filled. - -The entries returned are the host cpuid as returned by the cpuid instruction, -with unknown or unsupported features masked out. Some features (for example, -x2apic), may not be present in the host cpu, but are exposed by kvm if it can -emulate them efficiently. The fields in each entry are defined as follows: - - function: the eax value used to obtain the entry - index: the ecx value used to obtain the entry (for entries that are - affected by ecx) - flags: an OR of zero or more of the following: - KVM_CPUID_FLAG_SIGNIFCANT_INDEX: - if the index field is valid - KVM_CPUID_FLAG_STATEFUL_FUNC: - if cpuid for this function returns different values for successive - invocations; there will be several entries with the same function, - all with this flag set - KVM_CPUID_FLAG_STATE_READ_NEXT: - for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is - the first entry to be read by a cpu - eax, ebx, ecx, edx: the values returned by the cpuid instruction for - this function/index combination - -4.47 KVM_PPC_GET_PVINFO - -Capability: KVM_CAP_PPC_GET_PVINFO -Architectures: ppc -Type: vm ioctl -Parameters: struct kvm_ppc_pvinfo (out) -Returns: 0 on success, !0 on error - -struct kvm_ppc_pvinfo { - __u32 flags; - __u32 hcall[4]; - __u8 pad[108]; -}; - -This ioctl fetches PV specific information that need to be passed to the guest -using the device tree or other means from vm context. - -For now the only implemented piece of information distributed here is an array -of 4 instructions that make up a hypercall. - -If any additional field gets added to this structure later on, a bit for that -additional piece of information will be set in the flags bitmap. - -4.48 KVM_ASSIGN_PCI_DEVICE - -Capability: KVM_CAP_DEVICE_ASSIGNMENT -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_assigned_pci_dev (in) -Returns: 0 on success, -1 on error - -Assigns a host PCI device to the VM. - -struct kvm_assigned_pci_dev { - __u32 assigned_dev_id; - __u32 busnr; - __u32 devfn; - __u32 flags; - __u32 segnr; - union { - __u32 reserved[11]; - }; -}; - -The PCI device is specified by the triple segnr, busnr, and devfn. -Identification in succeeding service requests is done via assigned_dev_id. The -following flags are specified: - -/* Depends on KVM_CAP_IOMMU */ -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) - -4.49 KVM_DEASSIGN_PCI_DEVICE - -Capability: KVM_CAP_DEVICE_DEASSIGNMENT -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_assigned_pci_dev (in) -Returns: 0 on success, -1 on error - -Ends PCI device assignment, releasing all associated resources. - -See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is -used in kvm_assigned_pci_dev to identify the device. - -4.50 KVM_ASSIGN_DEV_IRQ - -Capability: KVM_CAP_ASSIGN_DEV_IRQ -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_assigned_irq (in) -Returns: 0 on success, -1 on error - -Assigns an IRQ to a passed-through device. - -struct kvm_assigned_irq { - __u32 assigned_dev_id; - __u32 host_irq; - __u32 guest_irq; - __u32 flags; - union { - struct { - __u32 addr_lo; - __u32 addr_hi; - __u32 data; - } guest_msi; - __u32 reserved[12]; - }; -}; - -The following flags are defined: - -#define KVM_DEV_IRQ_HOST_INTX (1 << 0) -#define KVM_DEV_IRQ_HOST_MSI (1 << 1) -#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) - -#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) -#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) -#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) - -It is not valid to specify multiple types per host or guest IRQ. However, the -IRQ type of host and guest can differ or can even be null. - -4.51 KVM_DEASSIGN_DEV_IRQ - -Capability: KVM_CAP_ASSIGN_DEV_IRQ -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_assigned_irq (in) -Returns: 0 on success, -1 on error - -Ends an IRQ assignment to a passed-through device. - -See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified -by assigned_dev_id, flags must correspond to the IRQ type specified on -KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. - -4.52 KVM_SET_GSI_ROUTING - -Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_irq_routing (in) -Returns: 0 on success, -1 on error - -Sets the GSI routing table entries, overwriting any previously set entries. - -struct kvm_irq_routing { - __u32 nr; - __u32 flags; - struct kvm_irq_routing_entry entries[0]; -}; - -No flags are specified so far, the corresponding field must be set to zero. - -struct kvm_irq_routing_entry { - __u32 gsi; - __u32 type; - __u32 flags; - __u32 pad; - union { - struct kvm_irq_routing_irqchip irqchip; - struct kvm_irq_routing_msi msi; - __u32 pad[8]; - } u; -}; - -/* gsi routing entry types */ -#define KVM_IRQ_ROUTING_IRQCHIP 1 -#define KVM_IRQ_ROUTING_MSI 2 - -No flags are specified so far, the corresponding field must be set to zero. - -struct kvm_irq_routing_irqchip { - __u32 irqchip; - __u32 pin; -}; - -struct kvm_irq_routing_msi { - __u32 address_lo; - __u32 address_hi; - __u32 data; - __u32 pad; -}; - -4.53 KVM_ASSIGN_SET_MSIX_NR - -Capability: KVM_CAP_DEVICE_MSIX -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_assigned_msix_nr (in) -Returns: 0 on success, -1 on error - -Set the number of MSI-X interrupts for an assigned device. This service can -only be called once in the lifetime of an assigned device. - -struct kvm_assigned_msix_nr { - __u32 assigned_dev_id; - __u16 entry_nr; - __u16 padding; -}; - -#define KVM_MAX_MSIX_PER_DEV 256 - -4.54 KVM_ASSIGN_SET_MSIX_ENTRY - -Capability: KVM_CAP_DEVICE_MSIX -Architectures: x86 ia64 -Type: vm ioctl -Parameters: struct kvm_assigned_msix_entry (in) -Returns: 0 on success, -1 on error - -Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting -the GSI vector to zero means disabling the interrupt. - -struct kvm_assigned_msix_entry { - __u32 assigned_dev_id; - __u32 gsi; - __u16 entry; /* The index of entry in the MSI-X table */ - __u16 padding[3]; -}; - -5. The kvm_run structure - -Application code obtains a pointer to the kvm_run structure by -mmap()ing a vcpu fd. From that point, application code can control -execution by changing fields in kvm_run prior to calling the KVM_RUN -ioctl, and obtain information about the reason KVM_RUN returned by -looking up structure members. - -struct kvm_run { - /* in */ - __u8 request_interrupt_window; - -Request that KVM_RUN return when it becomes possible to inject external -interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. - - __u8 padding1[7]; - - /* out */ - __u32 exit_reason; - -When KVM_RUN has returned successfully (return value 0), this informs -application code why KVM_RUN has returned. Allowable values for this -field are detailed below. - - __u8 ready_for_interrupt_injection; - -If request_interrupt_window has been specified, this field indicates -an interrupt can be injected now with KVM_INTERRUPT. - - __u8 if_flag; - -The value of the current interrupt flag. Only valid if in-kernel -local APIC is not used. - - __u8 padding2[2]; - - /* in (pre_kvm_run), out (post_kvm_run) */ - __u64 cr8; - -The value of the cr8 register. Only valid if in-kernel local APIC is -not used. Both input and output. - - __u64 apic_base; - -The value of the APIC BASE msr. Only valid if in-kernel local -APIC is not used. Both input and output. - - union { - /* KVM_EXIT_UNKNOWN */ - struct { - __u64 hardware_exit_reason; - } hw; - -If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown -reasons. Further architecture-specific information is available in -hardware_exit_reason. - - /* KVM_EXIT_FAIL_ENTRY */ - struct { - __u64 hardware_entry_failure_reason; - } fail_entry; - -If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due -to unknown reasons. Further architecture-specific information is -available in hardware_entry_failure_reason. - - /* KVM_EXIT_EXCEPTION */ - struct { - __u32 exception; - __u32 error_code; - } ex; - -Unused. - - /* KVM_EXIT_IO */ - struct { -#define KVM_EXIT_IO_IN 0 -#define KVM_EXIT_IO_OUT 1 - __u8 direction; - __u8 size; /* bytes */ - __u16 port; - __u32 count; - __u64 data_offset; /* relative to kvm_run start */ - } io; - -If exit_reason is KVM_EXIT_IO, then the vcpu has -executed a port I/O instruction which could not be satisfied by kvm. -data_offset describes where the data is located (KVM_EXIT_IO_OUT) or -where kvm expects application code to place the data for the next -KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. - - struct { - struct kvm_debug_exit_arch arch; - } debug; - -Unused. - - /* KVM_EXIT_MMIO */ - struct { - __u64 phys_addr; - __u8 data[8]; - __u32 len; - __u8 is_write; - } mmio; - -If exit_reason is KVM_EXIT_MMIO, then the vcpu has -executed a memory-mapped I/O instruction which could not be satisfied -by kvm. The 'data' member contains the written data if 'is_write' is -true, and should be filled by application code otherwise. - -NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding -operations are complete (and guest state is consistent) only after userspace -has re-entered the kernel with KVM_RUN. The kernel side will first finish -incomplete operations and then check for pending signals. Userspace -can re-enter the guest with an unmasked signal pending to complete -pending operations. - - /* KVM_EXIT_HYPERCALL */ - struct { - __u64 nr; - __u64 args[6]; - __u64 ret; - __u32 longmode; - __u32 pad; - } hypercall; - -Unused. This was once used for 'hypercall to userspace'. To implement -such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). -Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. - - /* KVM_EXIT_TPR_ACCESS */ - struct { - __u64 rip; - __u32 is_write; - __u32 pad; - } tpr_access; - -To be documented (KVM_TPR_ACCESS_REPORTING). - - /* KVM_EXIT_S390_SIEIC */ - struct { - __u8 icptcode; - __u64 mask; /* psw upper half */ - __u64 addr; /* psw lower half */ - __u16 ipa; - __u32 ipb; - } s390_sieic; - -s390 specific. - - /* KVM_EXIT_S390_RESET */ -#define KVM_S390_RESET_POR 1 -#define KVM_S390_RESET_CLEAR 2 -#define KVM_S390_RESET_SUBSYSTEM 4 -#define KVM_S390_RESET_CPU_INIT 8 -#define KVM_S390_RESET_IPL 16 - __u64 s390_reset_flags; - -s390 specific. - - /* KVM_EXIT_DCR */ - struct { - __u32 dcrn; - __u32 data; - __u8 is_write; - } dcr; - -powerpc specific. - - /* KVM_EXIT_OSI */ - struct { - __u64 gprs[32]; - } osi; - -MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch -hypercalls and exit with this exit struct that contains all the guest gprs. - -If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. -Userspace can now handle the hypercall and when it's done modify the gprs as -necessary. Upon guest entry all guest GPRs will then be replaced by the values -in this struct. - - /* Fix the size of the union. */ - char padding[256]; - }; -}; diff --git a/Documentation/kvm/review-checklist.txt b/Documentation/kvm/review-checklist.txt deleted file mode 100644 index 730475ae1b8d..000000000000 --- a/Documentation/kvm/review-checklist.txt +++ /dev/null @@ -1,38 +0,0 @@ -Review checklist for kvm patches -================================ - -1. The patch must follow Documentation/CodingStyle and - Documentation/SubmittingPatches. - -2. Patches should be against kvm.git master branch. - -3. If the patch introduces or modifies a new userspace API: - - the API must be documented in Documentation/kvm/api.txt - - the API must be discoverable using KVM_CHECK_EXTENSION - -4. New state must include support for save/restore. - -5. New features must default to off (userspace should explicitly request them). - Performance improvements can and should default to on. - -6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2 - -7. Emulator changes should be accompanied by unit tests for qemu-kvm.git - kvm/test directory. - -8. Changes should be vendor neutral when possible. Changes to common code - are better than duplicating changes to vendor code. - -9. Similarly, prefer changes to arch independent code than to arch dependent - code. - -10. User/kernel interfaces and guest/host interfaces must be 64-bit clean - (all variables and sizes naturally aligned on 64-bit; use specific types - only - u64 rather than ulong). - -11. New guest visible features must either be documented in a hardware manual - or be accompanied by documentation. - -12. Features must be robust against reset and kexec - for example, shared - host/guest memory must be unshared to prevent the host from writing to - guest memory that the guest has not reserved for this purpose. diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt deleted file mode 100644 index 4beafa663dd6..000000000000 --- a/Documentation/laptops/acer-wmi.txt +++ /dev/null @@ -1,184 +0,0 @@ -Acer Laptop WMI Extras Driver -http://code.google.com/p/aceracpi -Version 0.3 -4th April 2009 - -Copyright 2007-2009 Carlos Corbacho - -acer-wmi is a driver to allow you to control various parts of your Acer laptop -hardware under Linux which are exposed via ACPI-WMI. - -This driver completely replaces the old out-of-tree acer_acpi, which I am -currently maintaining for bug fixes only on pre-2.6.25 kernels. All development -work is now focused solely on acer-wmi. - -Disclaimer -********** - -Acer and Wistron have provided nothing towards the development acer_acpi or -acer-wmi. All information we have has been through the efforts of the developers -and the users to discover as much as possible about the hardware. - -As such, I do warn that this could break your hardware - this is extremely -unlikely of course, but please bear this in mind. - -Background -********** - -acer-wmi is derived from acer_acpi, originally developed by Mark -Smith in 2005, then taken over by Carlos Corbacho in 2007, in order to activate -the wireless LAN card under a 64-bit version of Linux, as acerhk[1] (the -previous solution to the problem) relied on making 32 bit BIOS calls which are -not possible in kernel space from a 64 bit OS. - -[1] acerhk: http://www.cakey.de/acerhk/ - -Supported Hardware -****************** - -NOTE: The Acer Aspire One is not supported hardware. It cannot work with -acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been -blacklisted until that happens. - -Please see the website for the current list of known working hardware: - -http://code.google.com/p/aceracpi/wiki/SupportedHardware - -If your laptop is not listed, or listed as unknown, and works with acer-wmi, -please contact me with a copy of the DSDT. - -If your Acer laptop doesn't work with acer-wmi, I would also like to see the -DSDT. - -To send me the DSDT, as root/sudo: - -cat /sys/firmware/acpi/tables/DSDT > dsdt - -And send me the resulting 'dsdt' file. - -Usage -***** - -On Acer laptops, acer-wmi should already be autoloaded based on DMI matching. -For non-Acer laptops, until WMI based autoloading support is added, you will -need to manually load acer-wmi. - -acer-wmi creates /sys/devices/platform/acer-wmi, and fills it with various -files whose usage is detailed below, which enables you to control some of the -following (varies between models): - -* the wireless LAN card radio -* inbuilt Bluetooth adapter -* inbuilt 3G card -* mail LED of your laptop -* brightness of the LCD panel - -Wireless -******** - -With regards to wireless, all acer-wmi does is enable the radio on the card. It -is not responsible for the wireless LED - once the radio is enabled, this is -down to the wireless driver for your card. So the behaviour of the wireless LED, -once you enable the radio, will depend on your hardware and driver combination. - -e.g. With the BCM4318 on the Acer Aspire 5020 series: - -ndiswrapper: Light blinks on when transmitting -b43: Solid light, blinks off when transmitting - -Wireless radio control is unconditionally enabled - all Acer laptops that support -acer-wmi come with built-in wireless. However, should you feel so inclined to -ever wish to remove the card, or swap it out at some point, please get in touch -with me, as we may well be able to gain some data on wireless card detection. - -The wireless radio is exposed through rfkill. - -Bluetooth -********* - -For bluetooth, this is an internal USB dongle, so once enabled, you will get -a USB device connection event, and a new USB device appears. When you disable -bluetooth, you get the reverse - a USB device disconnect event, followed by the -device disappearing again. - -Bluetooth is autodetected by acer-wmi, so if you do not have a bluetooth module -installed in your laptop, this file won't exist (please be aware that it is -quite common for Acer not to fit bluetooth to their laptops - so just because -you have a bluetooth button on the laptop, doesn't mean that bluetooth is -installed). - -For the adventurously minded - if you want to buy an internal bluetooth -module off the internet that is compatible with your laptop and fit it, then -it will work just fine with acer-wmi. - -Bluetooth is exposed through rfkill. - -3G -** - -3G is currently not autodetected, so the 'threeg' file is always created under -sysfs. So far, no-one in possession of an Acer laptop with 3G built-in appears to -have tried Linux, or reported back, so we don't have any information on this. - -If you have an Acer laptop that does have a 3G card in, please contact me so we -can properly detect these, and find out a bit more about them. - -To read the status of the 3G card (0=off, 1=on): -cat /sys/devices/platform/acer-wmi/threeg - -To enable the 3G card: -echo 1 > /sys/devices/platform/acer-wmi/threeg - -To disable the 3G card: -echo 0 > /sys/devices/platform/acer-wmi/threeg - -To set the state of the 3G card when loading acer-wmi, pass: -threeg=X (where X is 0 or 1) - -Mail LED -******** - -This can be found in most older Acer laptops supported by acer-wmi, and many -newer ones - it is built into the 'mail' button, and blinks when active. - -On newer (WMID) laptops though, we have no way of detecting the mail LED. If -your laptop identifies itself in dmesg as a WMID model, then please try loading -acer_acpi with: - -force_series=2490 - -This will use a known alternative method of reading/ writing the mail LED. If -it works, please report back to me with the DMI data from your laptop so this -can be added to acer-wmi. - -The LED is exposed through the LED subsystem, and can be found in: - -/sys/devices/platform/acer-wmi/leds/acer-wmi::mail/ - -The mail LED is autodetected, so if you don't have one, the LED device won't -be registered. - -Backlight -********* - -The backlight brightness control is available on all acer-wmi supported -hardware. The maximum brightness level is usually 15, but on some newer laptops -it's 10 (this is again autodetected). - -The backlight is exposed through the backlight subsystem, and can be found in: - -/sys/devices/platform/acer-wmi/backlight/acer-wmi/ - -Credits -******* - -Olaf Tauber, who did the real hard work when he developed acerhk -http://www.cakey.de/acerhk/ -All the authors of laptop ACPI modules in the kernel, whose work -was an inspiration in the early days of acer_acpi -Mathieu Segaud, who solved the problem with having to modprobe the driver -twice in acer_acpi 0.2. -Jim Ramsay, who added support for the WMID interface -Mark Smith, who started the original acer_acpi - -And the many people who have used both acer_acpi and acer-wmi. diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile deleted file mode 100644 index bebac6b4f332..000000000000 --- a/Documentation/lguest/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# This creates the demonstration utility "lguest" which runs a Linux guest. -# Missing headers? Add "-I../../include -I../../arch/x86/include" -CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE - -all: lguest - -clean: - rm -f lguest diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c deleted file mode 100644 index d9da7e148538..000000000000 --- a/Documentation/lguest/lguest.c +++ /dev/null @@ -1,2095 +0,0 @@ -/*P:100 - * This is the Launcher code, a simple program which lays out the "physical" - * memory for the new Guest by mapping the kernel image and the virtual - * devices, then opens /dev/lguest to tell the kernel about the Guest and - * control it. -:*/ -#define _LARGEFILE64_SOURCE -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "../../include/linux/lguest_launcher.h" -/*L:110 - * We can ignore the 42 include files we need for this program, but I do want - * to draw attention to the use of kernel-style types. - * - * As Linus said, "C is a Spartan language, and so should your naming be." I - * like these abbreviations, so we define them here. Note that u64 is always - * unsigned long long, which works on all Linux systems: this means that we can - * use %llu in printf for any u64. - */ -typedef unsigned long long u64; -typedef uint32_t u32; -typedef uint16_t u16; -typedef uint8_t u8; -/*:*/ - -#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ -#define BRIDGE_PFX "bridge:" -#ifndef SIOCBRADDIF -#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ -#endif -/* We can have up to 256 pages for devices. */ -#define DEVICE_PAGES 256 -/* This will occupy 3 pages: it must be a power of 2. */ -#define VIRTQUEUE_NUM 256 - -/*L:120 - * verbose is both a global flag and a macro. The C preprocessor allows - * this, and although I wouldn't recommend it, it works quite nicely here. - */ -static bool verbose; -#define verbose(args...) \ - do { if (verbose) printf(args); } while(0) -/*:*/ - -/* The pointer to the start of guest memory. */ -static void *guest_base; -/* The maximum guest physical address allowed, and maximum possible. */ -static unsigned long guest_limit, guest_max; -/* The /dev/lguest file descriptor. */ -static int lguest_fd; - -/* a per-cpu variable indicating whose vcpu is currently running */ -static unsigned int __thread cpu_id; - -/* This is our list of devices. */ -struct device_list { - /* Counter to assign interrupt numbers. */ - unsigned int next_irq; - - /* Counter to print out convenient device numbers. */ - unsigned int device_num; - - /* The descriptor page for the devices. */ - u8 *descpage; - - /* A single linked list of devices. */ - struct device *dev; - /* And a pointer to the last device for easy append. */ - struct device *lastdev; -}; - -/* The list of Guest devices, based on command line arguments. */ -static struct device_list devices; - -/* The device structure describes a single device. */ -struct device { - /* The linked-list pointer. */ - struct device *next; - - /* The device's descriptor, as mapped into the Guest. */ - struct lguest_device_desc *desc; - - /* We can't trust desc values once Guest has booted: we use these. */ - unsigned int feature_len; - unsigned int num_vq; - - /* The name of this device, for --verbose. */ - const char *name; - - /* Any queues attached to this device */ - struct virtqueue *vq; - - /* Is it operational */ - bool running; - - /* Does Guest want an intrrupt on empty? */ - bool irq_on_empty; - - /* Device-specific data. */ - void *priv; -}; - -/* The virtqueue structure describes a queue attached to a device. */ -struct virtqueue { - struct virtqueue *next; - - /* Which device owns me. */ - struct device *dev; - - /* The configuration for this queue. */ - struct lguest_vqconfig config; - - /* The actual ring of buffers. */ - struct vring vring; - - /* Last available index we saw. */ - u16 last_avail_idx; - - /* How many are used since we sent last irq? */ - unsigned int pending_used; - - /* Eventfd where Guest notifications arrive. */ - int eventfd; - - /* Function for the thread which is servicing this virtqueue. */ - void (*service)(struct virtqueue *vq); - pid_t thread; -}; - -/* Remember the arguments to the program so we can "reboot" */ -static char **main_args; - -/* The original tty settings to restore on exit. */ -static struct termios orig_term; - -/* - * We have to be careful with barriers: our devices are all run in separate - * threads and so we need to make sure that changes visible to the Guest happen - * in precise order. - */ -#define wmb() __asm__ __volatile__("" : : : "memory") -#define mb() __asm__ __volatile__("" : : : "memory") - -/* - * Convert an iovec element to the given type. - * - * This is a fairly ugly trick: we need to know the size of the type and - * alignment requirement to check the pointer is kosher. It's also nice to - * have the name of the type in case we report failure. - * - * Typing those three things all the time is cumbersome and error prone, so we - * have a macro which sets them all up and passes to the real function. - */ -#define convert(iov, type) \ - ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) - -static void *_convert(struct iovec *iov, size_t size, size_t align, - const char *name) -{ - if (iov->iov_len != size) - errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); - if ((unsigned long)iov->iov_base % align != 0) - errx(1, "Bad alignment %p for %s", iov->iov_base, name); - return iov->iov_base; -} - -/* Wrapper for the last available index. Makes it easier to change. */ -#define lg_last_avail(vq) ((vq)->last_avail_idx) - -/* - * The virtio configuration space is defined to be little-endian. x86 is - * little-endian too, but it's nice to be explicit so we have these helpers. - */ -#define cpu_to_le16(v16) (v16) -#define cpu_to_le32(v32) (v32) -#define cpu_to_le64(v64) (v64) -#define le16_to_cpu(v16) (v16) -#define le32_to_cpu(v32) (v32) -#define le64_to_cpu(v64) (v64) - -/* Is this iovec empty? */ -static bool iov_empty(const struct iovec iov[], unsigned int num_iov) -{ - unsigned int i; - - for (i = 0; i < num_iov; i++) - if (iov[i].iov_len) - return false; - return true; -} - -/* Take len bytes from the front of this iovec. */ -static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) -{ - unsigned int i; - - for (i = 0; i < num_iov; i++) { - unsigned int used; - - used = iov[i].iov_len < len ? iov[i].iov_len : len; - iov[i].iov_base += used; - iov[i].iov_len -= used; - len -= used; - } - assert(len == 0); -} - -/* The device virtqueue descriptors are followed by feature bitmasks. */ -static u8 *get_feature_bits(struct device *dev) -{ - return (u8 *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig); -} - -/*L:100 - * The Launcher code itself takes us out into userspace, that scary place where - * pointers run wild and free! Unfortunately, like most userspace programs, - * it's quite boring (which is why everyone likes to hack on the kernel!). - * Perhaps if you make up an Lguest Drinking Game at this point, it will get - * you through this section. Or, maybe not. - * - * The Launcher sets up a big chunk of memory to be the Guest's "physical" - * memory and stores it in "guest_base". In other words, Guest physical == - * Launcher virtual with an offset. - * - * This can be tough to get your head around, but usually it just means that we - * use these trivial conversion functions when the Guest gives us its - * "physical" addresses: - */ -static void *from_guest_phys(unsigned long addr) -{ - return guest_base + addr; -} - -static unsigned long to_guest_phys(const void *addr) -{ - return (addr - guest_base); -} - -/*L:130 - * Loading the Kernel. - * - * We start with couple of simple helper routines. open_or_die() avoids - * error-checking code cluttering the callers: - */ -static int open_or_die(const char *name, int flags) -{ - int fd = open(name, flags); - if (fd < 0) - err(1, "Failed to open %s", name); - return fd; -} - -/* map_zeroed_pages() takes a number of pages. */ -static void *map_zeroed_pages(unsigned int num) -{ - int fd = open_or_die("/dev/zero", O_RDONLY); - void *addr; - - /* - * We use a private mapping (ie. if we write to the page, it will be - * copied). We allocate an extra two pages PROT_NONE to act as guard - * pages against read/write attempts that exceed allocated space. - */ - addr = mmap(NULL, getpagesize() * (num+2), - PROT_NONE, MAP_PRIVATE, fd, 0); - - if (addr == MAP_FAILED) - err(1, "Mmapping %u pages of /dev/zero", num); - - if (mprotect(addr + getpagesize(), getpagesize() * num, - PROT_READ|PROT_WRITE) == -1) - err(1, "mprotect rw %u pages failed", num); - - /* - * One neat mmap feature is that you can close the fd, and it - * stays mapped. - */ - close(fd); - - /* Return address after PROT_NONE page */ - return addr + getpagesize(); -} - -/* Get some more pages for a device. */ -static void *get_pages(unsigned int num) -{ - void *addr = from_guest_phys(guest_limit); - - guest_limit += num * getpagesize(); - if (guest_limit > guest_max) - errx(1, "Not enough memory for devices"); - return addr; -} - -/* - * This routine is used to load the kernel or initrd. It tries mmap, but if - * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), - * it falls back to reading the memory in. - */ -static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) -{ - ssize_t r; - - /* - * We map writable even though for some segments are marked read-only. - * The kernel really wants to be writable: it patches its own - * instructions. - * - * MAP_PRIVATE means that the page won't be copied until a write is - * done to it. This allows us to share untouched memory between - * Guests. - */ - if (mmap(addr, len, PROT_READ|PROT_WRITE, - MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) - return; - - /* pread does a seek and a read in one shot: saves a few lines. */ - r = pread(fd, addr, len, offset); - if (r != len) - err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); -} - -/* - * This routine takes an open vmlinux image, which is in ELF, and maps it into - * the Guest memory. ELF = Embedded Linking Format, which is the format used - * by all modern binaries on Linux including the kernel. - * - * The ELF headers give *two* addresses: a physical address, and a virtual - * address. We use the physical address; the Guest will map itself to the - * virtual address. - * - * We return the starting address. - */ -static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) -{ - Elf32_Phdr phdr[ehdr->e_phnum]; - unsigned int i; - - /* - * Sanity checks on the main ELF header: an x86 executable with a - * reasonable number of correctly-sized program headers. - */ - if (ehdr->e_type != ET_EXEC - || ehdr->e_machine != EM_386 - || ehdr->e_phentsize != sizeof(Elf32_Phdr) - || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) - errx(1, "Malformed elf header"); - - /* - * An ELF executable contains an ELF header and a number of "program" - * headers which indicate which parts ("segments") of the program to - * load where. - */ - - /* We read in all the program headers at once: */ - if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) - err(1, "Seeking to program headers"); - if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) - err(1, "Reading program headers"); - - /* - * Try all the headers: there are usually only three. A read-only one, - * a read-write one, and a "note" section which we don't load. - */ - for (i = 0; i < ehdr->e_phnum; i++) { - /* If this isn't a loadable segment, we ignore it */ - if (phdr[i].p_type != PT_LOAD) - continue; - - verbose("Section %i: size %i addr %p\n", - i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); - - /* We map this section of the file at its physical address. */ - map_at(elf_fd, from_guest_phys(phdr[i].p_paddr), - phdr[i].p_offset, phdr[i].p_filesz); - } - - /* The entry point is given in the ELF header. */ - return ehdr->e_entry; -} - -/*L:150 - * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed - * to jump into it and it will unpack itself. We used to have to perform some - * hairy magic because the unpacking code scared me. - * - * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote - * a small patch to jump over the tricky bits in the Guest, so now we just read - * the funky header so we know where in the file to load, and away we go! - */ -static unsigned long load_bzimage(int fd) -{ - struct boot_params boot; - int r; - /* Modern bzImages get loaded at 1M. */ - void *p = from_guest_phys(0x100000); - - /* - * Go back to the start of the file and read the header. It should be - * a Linux boot header (see Documentation/x86/i386/boot.txt) - */ - lseek(fd, 0, SEEK_SET); - read(fd, &boot, sizeof(boot)); - - /* Inside the setup_hdr, we expect the magic "HdrS" */ - if (memcmp(&boot.hdr.header, "HdrS", 4) != 0) - errx(1, "This doesn't look like a bzImage to me"); - - /* Skip over the extra sectors of the header. */ - lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET); - - /* Now read everything into memory. in nice big chunks. */ - while ((r = read(fd, p, 65536)) > 0) - p += r; - - /* Finally, code32_start tells us where to enter the kernel. */ - return boot.hdr.code32_start; -} - -/*L:140 - * Loading the kernel is easy when it's a "vmlinux", but most kernels - * come wrapped up in the self-decompressing "bzImage" format. With a little - * work, we can load those, too. - */ -static unsigned long load_kernel(int fd) -{ - Elf32_Ehdr hdr; - - /* Read in the first few bytes. */ - if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) - err(1, "Reading kernel"); - - /* If it's an ELF file, it starts with "\177ELF" */ - if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) - return map_elf(fd, &hdr); - - /* Otherwise we assume it's a bzImage, and try to load it. */ - return load_bzimage(fd); -} - -/* - * This is a trivial little helper to align pages. Andi Kleen hated it because - * it calls getpagesize() twice: "it's dumb code." - * - * Kernel guys get really het up about optimization, even when it's not - * necessary. I leave this code as a reaction against that. - */ -static inline unsigned long page_align(unsigned long addr) -{ - /* Add upwards and truncate downwards. */ - return ((addr + getpagesize()-1) & ~(getpagesize()-1)); -} - -/*L:180 - * An "initial ram disk" is a disk image loaded into memory along with the - * kernel which the kernel can use to boot from without needing any drivers. - * Most distributions now use this as standard: the initrd contains the code to - * load the appropriate driver modules for the current machine. - * - * Importantly, James Morris works for RedHat, and Fedora uses initrds for its - * kernels. He sent me this (and tells me when I break it). - */ -static unsigned long load_initrd(const char *name, unsigned long mem) -{ - int ifd; - struct stat st; - unsigned long len; - - ifd = open_or_die(name, O_RDONLY); - /* fstat() is needed to get the file size. */ - if (fstat(ifd, &st) < 0) - err(1, "fstat() on initrd '%s'", name); - - /* - * We map the initrd at the top of memory, but mmap wants it to be - * page-aligned, so we round the size up for that. - */ - len = page_align(st.st_size); - map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); - /* - * Once a file is mapped, you can close the file descriptor. It's a - * little odd, but quite useful. - */ - close(ifd); - verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); - - /* We return the initrd size. */ - return len; -} -/*:*/ - -/* - * Simple routine to roll all the commandline arguments together with spaces - * between them. - */ -static void concat(char *dst, char *args[]) -{ - unsigned int i, len = 0; - - for (i = 0; args[i]; i++) { - if (i) { - strcat(dst+len, " "); - len++; - } - strcpy(dst+len, args[i]); - len += strlen(args[i]); - } - /* In case it's empty. */ - dst[len] = '\0'; -} - -/*L:185 - * This is where we actually tell the kernel to initialize the Guest. We - * saw the arguments it expects when we looked at initialize() in lguest_user.c: - * the base of Guest "physical" memory, the top physical page to allow and the - * entry point for the Guest. - */ -static void tell_kernel(unsigned long start) -{ - unsigned long args[] = { LHREQ_INITIALIZE, - (unsigned long)guest_base, - guest_limit / getpagesize(), start }; - verbose("Guest: %p - %p (%#lx)\n", - guest_base, guest_base + guest_limit, guest_limit); - lguest_fd = open_or_die("/dev/lguest", O_RDWR); - if (write(lguest_fd, args, sizeof(args)) < 0) - err(1, "Writing to /dev/lguest"); -} -/*:*/ - -/*L:200 - * Device Handling. - * - * When the Guest gives us a buffer, it sends an array of addresses and sizes. - * We need to make sure it's not trying to reach into the Launcher itself, so - * we have a convenient routine which checks it and exits with an error message - * if something funny is going on: - */ -static void *_check_pointer(unsigned long addr, unsigned int size, - unsigned int line) -{ - /* - * Check if the requested address and size exceeds the allocated memory, - * or addr + size wraps around. - */ - if ((addr + size) > guest_limit || (addr + size) < addr) - errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); - /* - * We return a pointer for the caller's convenience, now we know it's - * safe to use. - */ - return from_guest_phys(addr); -} -/* A macro which transparently hands the line number to the real function. */ -#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) - -/* - * Each buffer in the virtqueues is actually a chain of descriptors. This - * function returns the next descriptor in the chain, or vq->vring.num if we're - * at the end. - */ -static unsigned next_desc(struct vring_desc *desc, - unsigned int i, unsigned int max) -{ - unsigned int next; - - /* If this descriptor says it doesn't chain, we're done. */ - if (!(desc[i].flags & VRING_DESC_F_NEXT)) - return max; - - /* Check they're not leading us off end of descriptors. */ - next = desc[i].next; - /* Make sure compiler knows to grab that: we don't want it changing! */ - wmb(); - - if (next >= max) - errx(1, "Desc next is %u", next); - - return next; -} - -/* - * This actually sends the interrupt for this virtqueue, if we've used a - * buffer. - */ -static void trigger_irq(struct virtqueue *vq) -{ - unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; - - /* Don't inform them if nothing used. */ - if (!vq->pending_used) - return; - vq->pending_used = 0; - - /* If they don't want an interrupt, don't send one... */ - if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { - /* ... unless they've asked us to force one on empty. */ - if (!vq->dev->irq_on_empty - || lg_last_avail(vq) != vq->vring.avail->idx) - return; - } - - /* Send the Guest an interrupt tell them we used something up. */ - if (write(lguest_fd, buf, sizeof(buf)) != 0) - err(1, "Triggering irq %i", vq->config.irq); -} - -/* - * This looks in the virtqueue for the first available buffer, and converts - * it to an iovec for convenient access. Since descriptors consist of some - * number of output then some number of input descriptors, it's actually two - * iovecs, but we pack them into one and note how many of each there were. - * - * This function waits if necessary, and returns the descriptor number found. - */ -static unsigned wait_for_vq_desc(struct virtqueue *vq, - struct iovec iov[], - unsigned int *out_num, unsigned int *in_num) -{ - unsigned int i, head, max; - struct vring_desc *desc; - u16 last_avail = lg_last_avail(vq); - - /* There's nothing available? */ - while (last_avail == vq->vring.avail->idx) { - u64 event; - - /* - * Since we're about to sleep, now is a good time to tell the - * Guest about what we've used up to now. - */ - trigger_irq(vq); - - /* OK, now we need to know about added descriptors. */ - vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; - - /* - * They could have slipped one in as we were doing that: make - * sure it's written, then check again. - */ - mb(); - if (last_avail != vq->vring.avail->idx) { - vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; - break; - } - - /* Nothing new? Wait for eventfd to tell us they refilled. */ - if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event)) - errx(1, "Event read failed?"); - - /* We don't need to be notified again. */ - vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; - } - - /* Check it isn't doing very strange things with descriptor numbers. */ - if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) - errx(1, "Guest moved used index from %u to %u", - last_avail, vq->vring.avail->idx); - - /* - * Grab the next descriptor number they're advertising, and increment - * the index we've seen. - */ - head = vq->vring.avail->ring[last_avail % vq->vring.num]; - lg_last_avail(vq)++; - - /* If their number is silly, that's a fatal mistake. */ - if (head >= vq->vring.num) - errx(1, "Guest says index %u is available", head); - - /* When we start there are none of either input nor output. */ - *out_num = *in_num = 0; - - max = vq->vring.num; - desc = vq->vring.desc; - i = head; - - /* - * If this is an indirect entry, then this buffer contains a descriptor - * table which we handle as if it's any normal descriptor chain. - */ - if (desc[i].flags & VRING_DESC_F_INDIRECT) { - if (desc[i].len % sizeof(struct vring_desc)) - errx(1, "Invalid size for indirect buffer table"); - - max = desc[i].len / sizeof(struct vring_desc); - desc = check_pointer(desc[i].addr, desc[i].len); - i = 0; - } - - do { - /* Grab the first descriptor, and check it's OK. */ - iov[*out_num + *in_num].iov_len = desc[i].len; - iov[*out_num + *in_num].iov_base - = check_pointer(desc[i].addr, desc[i].len); - /* If this is an input descriptor, increment that count. */ - if (desc[i].flags & VRING_DESC_F_WRITE) - (*in_num)++; - else { - /* - * If it's an output descriptor, they're all supposed - * to come before any input descriptors. - */ - if (*in_num) - errx(1, "Descriptor has out after in"); - (*out_num)++; - } - - /* If we've got too many, that implies a descriptor loop. */ - if (*out_num + *in_num > max) - errx(1, "Looped descriptor"); - } while ((i = next_desc(desc, i, max)) != max); - - return head; -} - -/* - * After we've used one of their buffers, we tell the Guest about it. Sometime - * later we'll want to send them an interrupt using trigger_irq(); note that - * wait_for_vq_desc() does that for us if it has to wait. - */ -static void add_used(struct virtqueue *vq, unsigned int head, int len) -{ - struct vring_used_elem *used; - - /* - * The virtqueue contains a ring of used buffers. Get a pointer to the - * next entry in that used ring. - */ - used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; - used->id = head; - used->len = len; - /* Make sure buffer is written before we update index. */ - wmb(); - vq->vring.used->idx++; - vq->pending_used++; -} - -/* And here's the combo meal deal. Supersize me! */ -static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) -{ - add_used(vq, head, len); - trigger_irq(vq); -} - -/* - * The Console - * - * We associate some data with the console for our exit hack. - */ -struct console_abort { - /* How many times have they hit ^C? */ - int count; - /* When did they start? */ - struct timeval start; -}; - -/* This is the routine which handles console input (ie. stdin). */ -static void console_input(struct virtqueue *vq) -{ - int len; - unsigned int head, in_num, out_num; - struct console_abort *abort = vq->dev->priv; - struct iovec iov[vq->vring.num]; - - /* Make sure there's a descriptor available. */ - head = wait_for_vq_desc(vq, iov, &out_num, &in_num); - if (out_num) - errx(1, "Output buffers in console in queue?"); - - /* Read into it. This is where we usually wait. */ - len = readv(STDIN_FILENO, iov, in_num); - if (len <= 0) { - /* Ran out of input? */ - warnx("Failed to get console input, ignoring console."); - /* - * For simplicity, dying threads kill the whole Launcher. So - * just nap here. - */ - for (;;) - pause(); - } - - /* Tell the Guest we used a buffer. */ - add_used_and_trigger(vq, head, len); - - /* - * Three ^C within one second? Exit. - * - * This is such a hack, but works surprisingly well. Each ^C has to - * be in a buffer by itself, so they can't be too fast. But we check - * that we get three within about a second, so they can't be too - * slow. - */ - if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) { - abort->count = 0; - return; - } - - abort->count++; - if (abort->count == 1) - gettimeofday(&abort->start, NULL); - else if (abort->count == 3) { - struct timeval now; - gettimeofday(&now, NULL); - /* Kill all Launcher processes with SIGINT, like normal ^C */ - if (now.tv_sec <= abort->start.tv_sec+1) - kill(0, SIGINT); - abort->count = 0; - } -} - -/* This is the routine which handles console output (ie. stdout). */ -static void console_output(struct virtqueue *vq) -{ - unsigned int head, out, in; - struct iovec iov[vq->vring.num]; - - /* We usually wait in here, for the Guest to give us something. */ - head = wait_for_vq_desc(vq, iov, &out, &in); - if (in) - errx(1, "Input buffers in console output queue?"); - - /* writev can return a partial write, so we loop here. */ - while (!iov_empty(iov, out)) { - int len = writev(STDOUT_FILENO, iov, out); - if (len <= 0) - err(1, "Write to stdout gave %i", len); - iov_consume(iov, out, len); - } - - /* - * We're finished with that buffer: if we're going to sleep, - * wait_for_vq_desc() will prod the Guest with an interrupt. - */ - add_used(vq, head, 0); -} - -/* - * The Network - * - * Handling output for network is also simple: we get all the output buffers - * and write them to /dev/net/tun. - */ -struct net_info { - int tunfd; -}; - -static void net_output(struct virtqueue *vq) -{ - struct net_info *net_info = vq->dev->priv; - unsigned int head, out, in; - struct iovec iov[vq->vring.num]; - - /* We usually wait in here for the Guest to give us a packet. */ - head = wait_for_vq_desc(vq, iov, &out, &in); - if (in) - errx(1, "Input buffers in net output queue?"); - /* - * Send the whole thing through to /dev/net/tun. It expects the exact - * same format: what a coincidence! - */ - if (writev(net_info->tunfd, iov, out) < 0) - errx(1, "Write to tun failed?"); - - /* - * Done with that one; wait_for_vq_desc() will send the interrupt if - * all packets are processed. - */ - add_used(vq, head, 0); -} - -/* - * Handling network input is a bit trickier, because I've tried to optimize it. - * - * First we have a helper routine which tells is if from this file descriptor - * (ie. the /dev/net/tun device) will block: - */ -static bool will_block(int fd) -{ - fd_set fdset; - struct timeval zero = { 0, 0 }; - FD_ZERO(&fdset); - FD_SET(fd, &fdset); - return select(fd+1, &fdset, NULL, NULL, &zero) != 1; -} - -/* - * This handles packets coming in from the tun device to our Guest. Like all - * service routines, it gets called again as soon as it returns, so you don't - * see a while(1) loop here. - */ -static void net_input(struct virtqueue *vq) -{ - int len; - unsigned int head, out, in; - struct iovec iov[vq->vring.num]; - struct net_info *net_info = vq->dev->priv; - - /* - * Get a descriptor to write an incoming packet into. This will also - * send an interrupt if they're out of descriptors. - */ - head = wait_for_vq_desc(vq, iov, &out, &in); - if (out) - errx(1, "Output buffers in net input queue?"); - - /* - * If it looks like we'll block reading from the tun device, send them - * an interrupt. - */ - if (vq->pending_used && will_block(net_info->tunfd)) - trigger_irq(vq); - - /* - * Read in the packet. This is where we normally wait (when there's no - * incoming network traffic). - */ - len = readv(net_info->tunfd, iov, in); - if (len <= 0) - err(1, "Failed to read from tun."); - - /* - * Mark that packet buffer as used, but don't interrupt here. We want - * to wait until we've done as much work as we can. - */ - add_used(vq, head, len); -} -/*:*/ - -/* This is the helper to create threads: run the service routine in a loop. */ -static int do_thread(void *_vq) -{ - struct virtqueue *vq = _vq; - - for (;;) - vq->service(vq); - return 0; -} - -/* - * When a child dies, we kill our entire process group with SIGTERM. This - * also has the side effect that the shell restores the console for us! - */ -static void kill_launcher(int signal) -{ - kill(0, SIGTERM); -} - -static void reset_device(struct device *dev) -{ - struct virtqueue *vq; - - verbose("Resetting device %s\n", dev->name); - - /* Clear any features they've acked. */ - memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); - - /* We're going to be explicitly killing threads, so ignore them. */ - signal(SIGCHLD, SIG_IGN); - - /* Zero out the virtqueues, get rid of their threads */ - for (vq = dev->vq; vq; vq = vq->next) { - if (vq->thread != (pid_t)-1) { - kill(vq->thread, SIGTERM); - waitpid(vq->thread, NULL, 0); - vq->thread = (pid_t)-1; - } - memset(vq->vring.desc, 0, - vring_size(vq->config.num, LGUEST_VRING_ALIGN)); - lg_last_avail(vq) = 0; - } - dev->running = false; - - /* Now we care if threads die. */ - signal(SIGCHLD, (void *)kill_launcher); -} - -/*L:216 - * This actually creates the thread which services the virtqueue for a device. - */ -static void create_thread(struct virtqueue *vq) -{ - /* - * Create stack for thread. Since the stack grows upwards, we point - * the stack pointer to the end of this region. - */ - char *stack = malloc(32768); - unsigned long args[] = { LHREQ_EVENTFD, - vq->config.pfn*getpagesize(), 0 }; - - /* Create a zero-initialized eventfd. */ - vq->eventfd = eventfd(0, 0); - if (vq->eventfd < 0) - err(1, "Creating eventfd"); - args[2] = vq->eventfd; - - /* - * Attach an eventfd to this virtqueue: it will go off when the Guest - * does an LHCALL_NOTIFY for this vq. - */ - if (write(lguest_fd, &args, sizeof(args)) != 0) - err(1, "Attaching eventfd"); - - /* - * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so - * we get a signal if it dies. - */ - vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); - if (vq->thread == (pid_t)-1) - err(1, "Creating clone"); - - /* We close our local copy now the child has it. */ - close(vq->eventfd); -} - -static bool accepted_feature(struct device *dev, unsigned int bit) -{ - const u8 *features = get_feature_bits(dev) + dev->feature_len; - - if (dev->feature_len < bit / CHAR_BIT) - return false; - return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); -} - -static void start_device(struct device *dev) -{ - unsigned int i; - struct virtqueue *vq; - - verbose("Device %s OK: offered", dev->name); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev)[i]); - verbose(", accepted"); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev) - [dev->feature_len+i]); - - dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); - - for (vq = dev->vq; vq; vq = vq->next) { - if (vq->service) - create_thread(vq); - } - dev->running = true; -} - -static void cleanup_devices(void) -{ - struct device *dev; - - for (dev = devices.dev; dev; dev = dev->next) - reset_device(dev); - - /* If we saved off the original terminal settings, restore them now. */ - if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) - tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); -} - -/* When the Guest tells us they updated the status field, we handle it. */ -static void update_device_status(struct device *dev) -{ - /* A zero status is a reset, otherwise it's a set of flags. */ - if (dev->desc->status == 0) - reset_device(dev); - else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { - warnx("Device %s configuration FAILED", dev->name); - if (dev->running) - reset_device(dev); - } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { - if (!dev->running) - start_device(dev); - } -} - -/*L:215 - * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In - * particular, it's used to notify us of device status changes during boot. - */ -static void handle_output(unsigned long addr) -{ - struct device *i; - - /* Check each device. */ - for (i = devices.dev; i; i = i->next) { - struct virtqueue *vq; - - /* - * Notifications to device descriptors mean they updated the - * device status. - */ - if (from_guest_phys(addr) == i->desc) { - update_device_status(i); - return; - } - - /* - * Devices *can* be used before status is set to DRIVER_OK. - * The original plan was that they would never do this: they - * would always finish setting up their status bits before - * actually touching the virtqueues. In practice, we allowed - * them to, and they do (eg. the disk probes for partition - * tables as part of initialization). - * - * If we see this, we start the device: once it's running, we - * expect the device to catch all the notifications. - */ - for (vq = i->vq; vq; vq = vq->next) { - if (addr != vq->config.pfn*getpagesize()) - continue; - if (i->running) - errx(1, "Notification on running %s", i->name); - /* This just calls create_thread() for each virtqueue */ - start_device(i); - return; - } - } - - /* - * Early console write is done using notify on a nul-terminated string - * in Guest memory. It's also great for hacking debugging messages - * into a Guest. - */ - if (addr >= guest_limit) - errx(1, "Bad NOTIFY %#lx", addr); - - write(STDOUT_FILENO, from_guest_phys(addr), - strnlen(from_guest_phys(addr), guest_limit - addr)); -} - -/*L:190 - * Device Setup - * - * All devices need a descriptor so the Guest knows it exists, and a "struct - * device" so the Launcher can keep track of it. We have common helper - * routines to allocate and manage them. - */ - -/* - * The layout of the device page is a "struct lguest_device_desc" followed by a - * number of virtqueue descriptors, then two sets of feature bits, then an - * array of configuration bytes. This routine returns the configuration - * pointer. - */ -static u8 *device_config(const struct device *dev) -{ - return (void *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig) - + dev->feature_len * 2; -} - -/* - * This routine allocates a new "struct lguest_device_desc" from descriptor - * table page just above the Guest's normal memory. It returns a pointer to - * that descriptor. - */ -static struct lguest_device_desc *new_dev_desc(u16 type) -{ - struct lguest_device_desc d = { .type = type }; - void *p; - - /* Figure out where the next device config is, based on the last one. */ - if (devices.lastdev) - p = device_config(devices.lastdev) - + devices.lastdev->desc->config_len; - else - p = devices.descpage; - - /* We only have one page for all the descriptors. */ - if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) - errx(1, "Too many devices"); - - /* p might not be aligned, so we memcpy in. */ - return memcpy(p, &d, sizeof(d)); -} - -/* - * Each device descriptor is followed by the description of its virtqueues. We - * specify how many descriptors the virtqueue is to have. - */ -static void add_virtqueue(struct device *dev, unsigned int num_descs, - void (*service)(struct virtqueue *)) -{ - unsigned int pages; - struct virtqueue **i, *vq = malloc(sizeof(*vq)); - void *p; - - /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) - / getpagesize(); - p = get_pages(pages); - - /* Initialize the virtqueue */ - vq->next = NULL; - vq->last_avail_idx = 0; - vq->dev = dev; - - /* - * This is the routine the service thread will run, and its Process ID - * once it's running. - */ - vq->service = service; - vq->thread = (pid_t)-1; - - /* Initialize the configuration. */ - vq->config.num = num_descs; - vq->config.irq = devices.next_irq++; - vq->config.pfn = to_guest_phys(p) / getpagesize(); - - /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); - - /* - * Append virtqueue to this device's descriptor. We use - * device_config() to get the end of the device's current virtqueues; - * we check that we haven't added any config or feature information - * yet, otherwise we'd be overwriting them. - */ - assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); - memcpy(device_config(dev), &vq->config, sizeof(vq->config)); - dev->num_vq++; - dev->desc->num_vq++; - - verbose("Virtqueue page %#lx\n", to_guest_phys(p)); - - /* - * Add to tail of list, so dev->vq is first vq, dev->vq->next is - * second. - */ - for (i = &dev->vq; *i; i = &(*i)->next); - *i = vq; -} - -/* - * The first half of the feature bitmask is for us to advertise features. The - * second half is for the Guest to accept features. - */ -static void add_feature(struct device *dev, unsigned bit) -{ - u8 *features = get_feature_bits(dev); - - /* We can't extend the feature bits once we've added config bytes */ - if (dev->desc->feature_len <= bit / CHAR_BIT) { - assert(dev->desc->config_len == 0); - dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; - } - - features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); -} - -/* - * This routine sets the configuration fields for an existing device's - * descriptor. It only works for the last device, but that's OK because that's - * how we use it. - */ -static void set_config(struct device *dev, unsigned len, const void *conf) -{ - /* Check we haven't overflowed our single page. */ - if (device_config(dev) + len > devices.descpage + getpagesize()) - errx(1, "Too many devices"); - - /* Copy in the config information, and store the length. */ - memcpy(device_config(dev), conf, len); - dev->desc->config_len = len; - - /* Size must fit in config_len field (8 bits)! */ - assert(dev->desc->config_len == len); -} - -/* - * This routine does all the creation and setup of a new device, including - * calling new_dev_desc() to allocate the descriptor and device memory. We - * don't actually start the service threads until later. - * - * See what I mean about userspace being boring? - */ -static struct device *new_device(const char *name, u16 type) -{ - struct device *dev = malloc(sizeof(*dev)); - - /* Now we populate the fields one at a time. */ - dev->desc = new_dev_desc(type); - dev->name = name; - dev->vq = NULL; - dev->feature_len = 0; - dev->num_vq = 0; - dev->running = false; - - /* - * Append to device list. Prepending to a single-linked list is - * easier, but the user expects the devices to be arranged on the bus - * in command-line order. The first network device on the command line - * is eth0, the first block device /dev/vda, etc. - */ - if (devices.lastdev) - devices.lastdev->next = dev; - else - devices.dev = dev; - devices.lastdev = dev; - - return dev; -} - -/* - * Our first setup routine is the console. It's a fairly simple device, but - * UNIX tty handling makes it uglier than it could be. - */ -static void setup_console(void) -{ - struct device *dev; - - /* If we can save the initial standard input settings... */ - if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { - struct termios term = orig_term; - /* - * Then we turn off echo, line buffering and ^C etc: We want a - * raw input stream to the Guest. - */ - term.c_lflag &= ~(ISIG|ICANON|ECHO); - tcsetattr(STDIN_FILENO, TCSANOW, &term); - } - - dev = new_device("console", VIRTIO_ID_CONSOLE); - - /* We store the console state in dev->priv, and initialize it. */ - dev->priv = malloc(sizeof(struct console_abort)); - ((struct console_abort *)dev->priv)->count = 0; - - /* - * The console needs two virtqueues: the input then the output. When - * they put something the input queue, we make sure we're listening to - * stdin. When they put something in the output queue, we write it to - * stdout. - */ - add_virtqueue(dev, VIRTQUEUE_NUM, console_input); - add_virtqueue(dev, VIRTQUEUE_NUM, console_output); - - verbose("device %u: console\n", ++devices.device_num); -} -/*:*/ - -/*M:010 - * Inter-guest networking is an interesting area. Simplest is to have a - * --sharenet= option which opens or creates a named pipe. This can be - * used to send packets to another guest in a 1:1 manner. - * - * More sopisticated is to use one of the tools developed for project like UML - * to do networking. - * - * Faster is to do virtio bonding in kernel. Doing this 1:1 would be - * completely generic ("here's my vring, attach to your vring") and would work - * for any traffic. Of course, namespace and permissions issues need to be - * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide - * multiple inter-guest channels behind one interface, although it would - * require some manner of hotplugging new virtio channels. - * - * Finally, we could implement a virtio network switch in the kernel. -:*/ - -static u32 str2ip(const char *ipaddr) -{ - unsigned int b[4]; - - if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4) - errx(1, "Failed to parse IP address '%s'", ipaddr); - return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; -} - -static void str2mac(const char *macaddr, unsigned char mac[6]) -{ - unsigned int m[6]; - if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x", - &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6) - errx(1, "Failed to parse mac address '%s'", macaddr); - mac[0] = m[0]; - mac[1] = m[1]; - mac[2] = m[2]; - mac[3] = m[3]; - mac[4] = m[4]; - mac[5] = m[5]; -} - -/* - * This code is "adapted" from libbridge: it attaches the Host end of the - * network device to the bridge device specified by the command line. - * - * This is yet another James Morris contribution (I'm an IP-level guy, so I - * dislike bridging), and I just try not to break it. - */ -static void add_to_bridge(int fd, const char *if_name, const char *br_name) -{ - int ifidx; - struct ifreq ifr; - - if (!*br_name) - errx(1, "must specify bridge name"); - - ifidx = if_nametoindex(if_name); - if (!ifidx) - errx(1, "interface %s does not exist!", if_name); - - strncpy(ifr.ifr_name, br_name, IFNAMSIZ); - ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ifr.ifr_ifindex = ifidx; - if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) - err(1, "can't add %s to bridge %s", if_name, br_name); -} - -/* - * This sets up the Host end of the network device with an IP address, brings - * it up so packets will flow, the copies the MAC address into the hwaddr - * pointer. - */ -static void configure_device(int fd, const char *tapif, u32 ipaddr) -{ - struct ifreq ifr; - struct sockaddr_in sin; - - memset(&ifr, 0, sizeof(ifr)); - strcpy(ifr.ifr_name, tapif); - - /* Don't read these incantations. Just cut & paste them like I did! */ - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htonl(ipaddr); - memcpy(&ifr.ifr_addr, &sin, sizeof(sin)); - if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) - err(1, "Setting %s interface address", tapif); - ifr.ifr_flags = IFF_UP; - if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) - err(1, "Bringing interface %s up", tapif); -} - -static int get_tun_device(char tapif[IFNAMSIZ]) -{ - struct ifreq ifr; - int netfd; - - /* Start with this zeroed. Messy but sure. */ - memset(&ifr, 0, sizeof(ifr)); - - /* - * We open the /dev/net/tun device and tell it we want a tap device. A - * tap device is like a tun device, only somehow different. To tell - * the truth, I completely blundered my way through this code, but it - * works now! - */ - netfd = open_or_die("/dev/net/tun", O_RDWR); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; - strcpy(ifr.ifr_name, "tap%d"); - if (ioctl(netfd, TUNSETIFF, &ifr) != 0) - err(1, "configuring /dev/net/tun"); - - if (ioctl(netfd, TUNSETOFFLOAD, - TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) - err(1, "Could not set features for tun device"); - - /* - * We don't need checksums calculated for packets coming in this - * device: trust us! - */ - ioctl(netfd, TUNSETNOCSUM, 1); - - memcpy(tapif, ifr.ifr_name, IFNAMSIZ); - return netfd; -} - -/*L:195 - * Our network is a Host<->Guest network. This can either use bridging or - * routing, but the principle is the same: it uses the "tun" device to inject - * packets into the Host as if they came in from a normal network card. We - * just shunt packets between the Guest and the tun device. - */ -static void setup_tun_net(char *arg) -{ - struct device *dev; - struct net_info *net_info = malloc(sizeof(*net_info)); - int ipfd; - u32 ip = INADDR_ANY; - bool bridging = false; - char tapif[IFNAMSIZ], *p; - struct virtio_net_config conf; - - net_info->tunfd = get_tun_device(tapif); - - /* First we create a new network device. */ - dev = new_device("net", VIRTIO_ID_NET); - dev->priv = net_info; - - /* Network devices need a recv and a send queue, just like console. */ - add_virtqueue(dev, VIRTQUEUE_NUM, net_input); - add_virtqueue(dev, VIRTQUEUE_NUM, net_output); - - /* - * We need a socket to perform the magic network ioctls to bring up the - * tap interface, connect to the bridge etc. Any socket will do! - */ - ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); - if (ipfd < 0) - err(1, "opening IP socket"); - - /* If the command line was --tunnet=bridge: do bridging. */ - if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { - arg += strlen(BRIDGE_PFX); - bridging = true; - } - - /* A mac address may follow the bridge name or IP address */ - p = strchr(arg, ':'); - if (p) { - str2mac(p+1, conf.mac); - add_feature(dev, VIRTIO_NET_F_MAC); - *p = '\0'; - } - - /* arg is now either an IP address or a bridge name */ - if (bridging) - add_to_bridge(ipfd, tapif, arg); - else - ip = str2ip(arg); - - /* Set up the tun device. */ - configure_device(ipfd, tapif, ip); - - add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); - /* Expect Guest to handle everything except UFO */ - add_feature(dev, VIRTIO_NET_F_CSUM); - add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); - add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); - add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); - add_feature(dev, VIRTIO_NET_F_GUEST_ECN); - add_feature(dev, VIRTIO_NET_F_HOST_TSO4); - add_feature(dev, VIRTIO_NET_F_HOST_TSO6); - add_feature(dev, VIRTIO_NET_F_HOST_ECN); - /* We handle indirect ring entries */ - add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); - set_config(dev, sizeof(conf), &conf); - - /* We don't need the socket any more; setup is done. */ - close(ipfd); - - devices.device_num++; - - if (bridging) - verbose("device %u: tun %s attached to bridge: %s\n", - devices.device_num, tapif, arg); - else - verbose("device %u: tun %s: %s\n", - devices.device_num, tapif, arg); -} -/*:*/ - -/* This hangs off device->priv. */ -struct vblk_info { - /* The size of the file. */ - off64_t len; - - /* The file descriptor for the file. */ - int fd; - -}; - -/*L:210 - * The Disk - * - * The disk only has one virtqueue, so it only has one thread. It is really - * simple: the Guest asks for a block number and we read or write that position - * in the file. - * - * Before we serviced each virtqueue in a separate thread, that was unacceptably - * slow: the Guest waits until the read is finished before running anything - * else, even if it could have been doing useful work. - * - * We could have used async I/O, except it's reputed to suck so hard that - * characters actually go missing from your code when you try to use it. - */ -static void blk_request(struct virtqueue *vq) -{ - struct vblk_info *vblk = vq->dev->priv; - unsigned int head, out_num, in_num, wlen; - int ret; - u8 *in; - struct virtio_blk_outhdr *out; - struct iovec iov[vq->vring.num]; - off64_t off; - - /* - * Get the next request, where we normally wait. It triggers the - * interrupt to acknowledge previously serviced requests (if any). - */ - head = wait_for_vq_desc(vq, iov, &out_num, &in_num); - - /* - * Every block request should contain at least one output buffer - * (detailing the location on disk and the type of request) and one - * input buffer (to hold the result). - */ - if (out_num == 0 || in_num == 0) - errx(1, "Bad virtblk cmd %u out=%u in=%u", - head, out_num, in_num); - - out = convert(&iov[0], struct virtio_blk_outhdr); - in = convert(&iov[out_num+in_num-1], u8); - /* - * For historical reasons, block operations are expressed in 512 byte - * "sectors". - */ - off = out->sector * 512; - - /* - * In general the virtio block driver is allowed to try SCSI commands. - * It'd be nice if we supported eject, for example, but we don't. - */ - if (out->type & VIRTIO_BLK_T_SCSI_CMD) { - fprintf(stderr, "Scsi commands unsupported\n"); - *in = VIRTIO_BLK_S_UNSUPP; - wlen = sizeof(*in); - } else if (out->type & VIRTIO_BLK_T_OUT) { - /* - * Write - * - * Move to the right location in the block file. This can fail - * if they try to write past end. - */ - if (lseek64(vblk->fd, off, SEEK_SET) != off) - err(1, "Bad seek to sector %llu", out->sector); - - ret = writev(vblk->fd, iov+1, out_num-1); - verbose("WRITE to sector %llu: %i\n", out->sector, ret); - - /* - * Grr... Now we know how long the descriptor they sent was, we - * make sure they didn't try to write over the end of the block - * file (possibly extending it). - */ - if (ret > 0 && off + ret > vblk->len) { - /* Trim it back to the correct length */ - ftruncate64(vblk->fd, vblk->len); - /* Die, bad Guest, die. */ - errx(1, "Write past end %llu+%u", off, ret); - } - - wlen = sizeof(*in); - *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); - } else if (out->type & VIRTIO_BLK_T_FLUSH) { - /* Flush */ - ret = fdatasync(vblk->fd); - verbose("FLUSH fdatasync: %i\n", ret); - wlen = sizeof(*in); - *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); - } else { - /* - * Read - * - * Move to the right location in the block file. This can fail - * if they try to read past end. - */ - if (lseek64(vblk->fd, off, SEEK_SET) != off) - err(1, "Bad seek to sector %llu", out->sector); - - ret = readv(vblk->fd, iov+1, in_num-1); - verbose("READ from sector %llu: %i\n", out->sector, ret); - if (ret >= 0) { - wlen = sizeof(*in) + ret; - *in = VIRTIO_BLK_S_OK; - } else { - wlen = sizeof(*in); - *in = VIRTIO_BLK_S_IOERR; - } - } - - /* Finished that request. */ - add_used(vq, head, wlen); -} - -/*L:198 This actually sets up a virtual block device. */ -static void setup_block_file(const char *filename) -{ - struct device *dev; - struct vblk_info *vblk; - struct virtio_blk_config conf; - - /* Creat the device. */ - dev = new_device("block", VIRTIO_ID_BLOCK); - - /* The device has one virtqueue, where the Guest places requests. */ - add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); - - /* Allocate the room for our own bookkeeping */ - vblk = dev->priv = malloc(sizeof(*vblk)); - - /* First we open the file and store the length. */ - vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); - vblk->len = lseek64(vblk->fd, 0, SEEK_END); - - /* We support FLUSH. */ - add_feature(dev, VIRTIO_BLK_F_FLUSH); - - /* Tell Guest how many sectors this device has. */ - conf.capacity = cpu_to_le64(vblk->len / 512); - - /* - * Tell Guest not to put in too many descriptors at once: two are used - * for the in and out elements. - */ - add_feature(dev, VIRTIO_BLK_F_SEG_MAX); - conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); - - /* Don't try to put whole struct: we have 8 bit limit. */ - set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf); - - verbose("device %u: virtblock %llu sectors\n", - ++devices.device_num, le64_to_cpu(conf.capacity)); -} - -/*L:211 - * Our random number generator device reads from /dev/random into the Guest's - * input buffers. The usual case is that the Guest doesn't want random numbers - * and so has no buffers although /dev/random is still readable, whereas - * console is the reverse. - * - * The same logic applies, however. - */ -struct rng_info { - int rfd; -}; - -static void rng_input(struct virtqueue *vq) -{ - int len; - unsigned int head, in_num, out_num, totlen = 0; - struct rng_info *rng_info = vq->dev->priv; - struct iovec iov[vq->vring.num]; - - /* First we need a buffer from the Guests's virtqueue. */ - head = wait_for_vq_desc(vq, iov, &out_num, &in_num); - if (out_num) - errx(1, "Output buffers in rng?"); - - /* - * Just like the console write, we loop to cover the whole iovec. - * In this case, short reads actually happen quite a bit. - */ - while (!iov_empty(iov, in_num)) { - len = readv(rng_info->rfd, iov, in_num); - if (len <= 0) - err(1, "Read from /dev/random gave %i", len); - iov_consume(iov, in_num, len); - totlen += len; - } - - /* Tell the Guest about the new input. */ - add_used(vq, head, totlen); -} - -/*L:199 - * This creates a "hardware" random number device for the Guest. - */ -static void setup_rng(void) -{ - struct device *dev; - struct rng_info *rng_info = malloc(sizeof(*rng_info)); - - /* Our device's privat info simply contains the /dev/random fd. */ - rng_info->rfd = open_or_die("/dev/random", O_RDONLY); - - /* Create the new device. */ - dev = new_device("rng", VIRTIO_ID_RNG); - dev->priv = rng_info; - - /* The device has one virtqueue, where the Guest places inbufs. */ - add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); - - verbose("device %u: rng\n", devices.device_num++); -} -/* That's the end of device setup. */ - -/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ -static void __attribute__((noreturn)) restart_guest(void) -{ - unsigned int i; - - /* - * Since we don't track all open fds, we simply close everything beyond - * stderr. - */ - for (i = 3; i < FD_SETSIZE; i++) - close(i); - - /* Reset all the devices (kills all threads). */ - cleanup_devices(); - - execv(main_args[0], main_args); - err(1, "Could not exec %s", main_args[0]); -} - -/*L:220 - * Finally we reach the core of the Launcher which runs the Guest, serves - * its input and output, and finally, lays it to rest. - */ -static void __attribute__((noreturn)) run_guest(void) -{ - for (;;) { - unsigned long notify_addr; - int readval; - - /* We read from the /dev/lguest device to run the Guest. */ - readval = pread(lguest_fd, ¬ify_addr, - sizeof(notify_addr), cpu_id); - - /* One unsigned long means the Guest did HCALL_NOTIFY */ - if (readval == sizeof(notify_addr)) { - verbose("Notify on address %#lx\n", notify_addr); - handle_output(notify_addr); - /* ENOENT means the Guest died. Reading tells us why. */ - } else if (errno == ENOENT) { - char reason[1024] = { 0 }; - pread(lguest_fd, reason, sizeof(reason)-1, cpu_id); - errx(1, "%s", reason); - /* ERESTART means that we need to reboot the guest */ - } else if (errno == ERESTART) { - restart_guest(); - /* Anything else means a bug or incompatible change. */ - } else - err(1, "Running guest failed"); - } -} -/*L:240 - * This is the end of the Launcher. The good news: we are over halfway - * through! The bad news: the most fiendish part of the code still lies ahead - * of us. - * - * Are you ready? Take a deep breath and join me in the core of the Host, in - * "make Host". -:*/ - -static struct option opts[] = { - { "verbose", 0, NULL, 'v' }, - { "tunnet", 1, NULL, 't' }, - { "block", 1, NULL, 'b' }, - { "rng", 0, NULL, 'r' }, - { "initrd", 1, NULL, 'i' }, - { "username", 1, NULL, 'u' }, - { "chroot", 1, NULL, 'c' }, - { NULL }, -}; -static void usage(void) -{ - errx(1, "Usage: lguest [--verbose] " - "[--tunnet=(:|bridge::)\n" - "|--block=|--initrd=]...\n" - " vmlinux [args...]"); -} - -/*L:105 The main routine is where the real work begins: */ -int main(int argc, char *argv[]) -{ - /* Memory, code startpoint and size of the (optional) initrd. */ - unsigned long mem = 0, start, initrd_size = 0; - /* Two temporaries. */ - int i, c; - /* The boot information for the Guest. */ - struct boot_params *boot; - /* If they specify an initrd file to load. */ - const char *initrd_name = NULL; - - /* Password structure for initgroups/setres[gu]id */ - struct passwd *user_details = NULL; - - /* Directory to chroot to */ - char *chroot_path = NULL; - - /* Save the args: we "reboot" by execing ourselves again. */ - main_args = argv; - - /* - * First we initialize the device list. We keep a pointer to the last - * device, and the next interrupt number to use for devices (1: - * remember that 0 is used by the timer). - */ - devices.lastdev = NULL; - devices.next_irq = 1; - - /* We're CPU 0. In fact, that's the only CPU possible right now. */ - cpu_id = 0; - - /* - * We need to know how much memory so we can set up the device - * descriptor and memory pages for the devices as we parse the command - * line. So we quickly look through the arguments to find the amount - * of memory now. - */ - for (i = 1; i < argc; i++) { - if (argv[i][0] != '-') { - mem = atoi(argv[i]) * 1024 * 1024; - /* - * We start by mapping anonymous pages over all of - * guest-physical memory range. This fills it with 0, - * and ensures that the Guest won't be killed when it - * tries to access it. - */ - guest_base = map_zeroed_pages(mem / getpagesize() - + DEVICE_PAGES); - guest_limit = mem; - guest_max = mem + DEVICE_PAGES*getpagesize(); - devices.descpage = get_pages(1); - break; - } - } - - /* The options are fairly straight-forward */ - while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { - switch (c) { - case 'v': - verbose = true; - break; - case 't': - setup_tun_net(optarg); - break; - case 'b': - setup_block_file(optarg); - break; - case 'r': - setup_rng(); - break; - case 'i': - initrd_name = optarg; - break; - case 'u': - user_details = getpwnam(optarg); - if (!user_details) - err(1, "getpwnam failed, incorrect username?"); - break; - case 'c': - chroot_path = optarg; - break; - default: - warnx("Unknown argument %s", argv[optind]); - usage(); - } - } - /* - * After the other arguments we expect memory and kernel image name, - * followed by command line arguments for the kernel. - */ - if (optind + 2 > argc) - usage(); - - verbose("Guest base is at %p\n", guest_base); - - /* We always have a console device */ - setup_console(); - - /* Now we load the kernel */ - start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); - - /* Boot information is stashed at physical address 0 */ - boot = from_guest_phys(0); - - /* Map the initrd image if requested (at top of physical memory) */ - if (initrd_name) { - initrd_size = load_initrd(initrd_name, mem); - /* - * These are the location in the Linux boot header where the - * start and size of the initrd are expected to be found. - */ - boot->hdr.ramdisk_image = mem - initrd_size; - boot->hdr.ramdisk_size = initrd_size; - /* The bootloader type 0xFF means "unknown"; that's OK. */ - boot->hdr.type_of_loader = 0xFF; - } - - /* - * The Linux boot header contains an "E820" memory map: ours is a - * simple, single region. - */ - boot->e820_entries = 1; - boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM }); - /* - * The boot header contains a command line pointer: we put the command - * line after the boot header. - */ - boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); - /* We use a simple helper to copy the arguments separated by spaces. */ - concat((char *)(boot + 1), argv+optind+2); - - /* Boot protocol version: 2.07 supports the fields for lguest. */ - boot->hdr.version = 0x207; - - /* The hardware_subarch value of "1" tells the Guest it's an lguest. */ - boot->hdr.hardware_subarch = 1; - - /* Tell the entry path not to try to reload segment registers. */ - boot->hdr.loadflags |= KEEP_SEGMENTS; - - /* - * We tell the kernel to initialize the Guest: this returns the open - * /dev/lguest file descriptor. - */ - tell_kernel(start); - - /* Ensure that we terminate if a device-servicing child dies. */ - signal(SIGCHLD, kill_launcher); - - /* If we exit via err(), this kills all the threads, restores tty. */ - atexit(cleanup_devices); - - /* If requested, chroot to a directory */ - if (chroot_path) { - if (chroot(chroot_path) != 0) - err(1, "chroot(\"%s\") failed", chroot_path); - - if (chdir("/") != 0) - err(1, "chdir(\"/\") failed"); - - verbose("chroot done\n"); - } - - /* If requested, drop privileges */ - if (user_details) { - uid_t u; - gid_t g; - - u = user_details->pw_uid; - g = user_details->pw_gid; - - if (initgroups(user_details->pw_name, g) != 0) - err(1, "initgroups failed"); - - if (setresgid(g, g, g) != 0) - err(1, "setresgid failed"); - - if (setresuid(u, u, u) != 0) - err(1, "setresuid failed"); - - verbose("Dropping privileges completed\n"); - } - - /* Finally, run the Guest. This doesn't return. */ - run_guest(); -} -/*:*/ - -/*M:999 - * Mastery is done: you now know everything I do. - * - * But surely you have seen code, features and bugs in your wanderings which - * you now yearn to attack? That is the real game, and I look forward to you - * patching and forking lguest into the Your-Name-Here-visor. - * - * Farewell, and good coding! - * Rusty Russell. - */ diff --git a/Documentation/lguest/lguest.txt b/Documentation/lguest/lguest.txt deleted file mode 100644 index dad99978a6a8..000000000000 --- a/Documentation/lguest/lguest.txt +++ /dev/null @@ -1,128 +0,0 @@ - __ - (___()'`; Rusty's Remarkably Unreliable Guide to Lguest - /, /` - or, A Young Coder's Illustrated Hypervisor - \\"--\\ http://lguest.ozlabs.org - -Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel, -for Linux developers and users to experiment with virtualization with the -minimum of complexity. Nonetheless, it should have sufficient features to -make it useful for specific tasks, and, of course, you are encouraged to fork -and enhance it (see drivers/lguest/README). - -Features: - -- Kernel module which runs in a normal kernel. -- Simple I/O model for communication. -- Simple program to create new guests. -- Logo contains cute puppies: http://lguest.ozlabs.org - -Developer features: - -- Fun to hack on. -- No ABI: being tied to a specific kernel anyway, you can change anything. -- Many opportunities for improvement or feature implementation. - -Running Lguest: - -- The easiest way to run lguest is to use same kernel as guest and host. - You can configure them differently, but usually it's easiest not to. - - You will need to configure your kernel with the following options: - - "General setup": - "Prompt for development and/or incomplete code/drivers" = Y - (CONFIG_EXPERIMENTAL=y) - - "Processor type and features": - "Paravirtualized guest support" = Y - "Lguest guest support" = Y - "High Memory Support" = off/4GB - "Alignment value to which kernel should be aligned" = 0x100000 - (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and - CONFIG_PHYSICAL_ALIGN=0x100000) - - "Device Drivers": - "Block devices" - "Virtio block driver (EXPERIMENTAL)" = M/Y - "Network device support" - "Universal TUN/TAP device driver support" = M/Y - "Virtio network driver (EXPERIMENTAL)" = M/Y - (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m) - - "Virtualization" - "Linux hypervisor example code" = M/Y - (CONFIG_LGUEST=m) - -- A tool called "lguest" is available in this directory: type "make" - to build it. If you didn't build your kernel in-tree, use "make - O=". - -- Create or find a root disk image. There are several useful ones - around, such as the xm-test tiny root image at - http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img - - For more serious work, I usually use a distribution ISO image and - install it under qemu, then make multiple copies: - - dd if=/dev/zero of=rootfile bs=1M count=2048 - qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d - - Make sure that you install a getty on /dev/hvc0 if you want to log in on the - console! - -- "modprobe lg" if you built it as a module. - -- Run an lguest as root: - - Documentation/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 --block=rootfile root=/dev/vda - - Explanation: - 64: the amount of memory to use, in MB. - - vmlinux: the kernel image found in the top of your build directory. You - can also use a standard bzImage. - - --tunnet=192.168.19.1: configures a "tap" device for networking with this - IP address. - - --block=rootfile: a file or block device which becomes /dev/vda - inside the guest. - - root=/dev/vda: this (and anything else on the command line) are - kernel boot parameters. - -- Configuring networking. I usually have the host masquerade, using - "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 > - /proc/sys/net/ipv4/ip_forward". In this example, I would configure - eth0 inside the guest at 192.168.19.2. - - Another method is to bridge the tap device to an external interface - using --tunnet=bridge:, and perhaps run dhcp on the guest - to obtain an IP address. The bridge needs to be configured first: - this option simply adds the tap interface to it. - - A simple example on my system: - - ifconfig eth0 0.0.0.0 - brctl addbr lg0 - ifconfig lg0 up - brctl addif lg0 eth0 - dhclient lg0 - - Then use --tunnet=bridge:lg0 when launching the guest. - - See: - - http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge - - for general information on how to get bridging to work. - -- Random number generation. Using the --rng option will provide a - /dev/hwrng in the guest that will read from the host's /dev/random. - Use this option in conjunction with rng-tools (see ../hw_random.txt) - to provide entropy to the guest kernel's /dev/random. - -There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest - -Good luck! -Rusty Russell rusty@rustcorp.com.au. diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 65f4c795015d..cef00d42ed5b 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -12,8 +12,9 @@ Because things like lock contention can severely impact performance. - HOW Lockdep already has hooks in the lock functions and maps lock instances to -lock classes. We build on that. The graph below shows the relation between -the lock functions and the various hooks therein. +lock classes. We build on that (see Documentation/lockdep-design.txt). +The graph below shows the relation between the lock functions and the various +hooks therein. __acquire | @@ -128,6 +129,37 @@ points are the points we're contending with. The integer part of the time values is in us. +Dealing with nested locks, subclasses may appear: + +32............................................................................................................................................................................................... +33 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11 +35 --------- +36 &rq->lock 645 [] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [] scheduler_tick+0x46/0x1fb +40 --------- +41 &rq->lock 77 [] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [] schedule+0x157/0x7b8 +45 +46............................................................................................................................................................................................... +47 +48 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53 +49 ----------- +50 &rq->lock/1 11526 [] double_rq_lock+0x4f/0x54 +51 ----------- +52 &rq->lock/1 5645 [] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [] try_to_wake_up+0x127/0x25a + +Line 48 shows statistics for the second subclass (/1) of &rq->lock class +(subclass starts from 0), since in this case, as line 50 suggests, +double_rq_lock actually acquires a nested lock of two spinlocks. + View the top contending locks: # grep : /proc/lock_stat | head @@ -136,7 +168,7 @@ View the top contending locks: dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 &inode->i_mutex: 161 286 18446744073709 62882.54 1244614.55 3653 20598 18446744073709 62318.60 1693822.74 &zone->lru_lock: 94 94 0.53 7.33 92.10 4366 32690 0.29 59.81 16350.06 - &inode->i_data.i_mmap_lock: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 + &inode->i_data.i_mmap_mutex: 79 79 0.40 3.77 53.03 11779 87755 0.28 116.93 29898.44 &q->__queue_lock: 48 50 0.52 31.62 86.31 774 13131 0.17 113.08 12277.52 &rq->rq_lock_key: 43 47 0.74 68.50 170.63 3706 33929 0.22 107.99 17460.62 &rq->rq_lock_key#2: 39 46 0.75 6.68 49.03 2979 32292 0.17 125.17 17137.63 diff --git a/Documentation/md.txt b/Documentation/md.txt index a81c7b4790f2..f0eee83ff78a 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -552,6 +552,16 @@ also have within the array where IO will be blocked. This is currently only supported for raid4/5/6. + sync_min + sync_max + The two values, given as numbers of sectors, indicate a range + within the array where 'check'/'repair' will operate. Must be + a multiple of chunk_size. When it reaches "sync_max" it will + pause, rather than complete. + You can use 'select' or 'poll' on "sync_completed" to wait for + that number to reach sync_max. Then you can either increase + "sync_max", or can write 'idle' to "sync_action". + Each active md device may also have attributes specific to the personality module that manages it. diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX index fca586f5b853..93dd7a714075 100644 --- a/Documentation/mmc/00-INDEX +++ b/Documentation/mmc/00-INDEX @@ -2,3 +2,5 @@ - this file mmc-dev-attrs.txt - info on SD and MMC device attributes +mmc-dev-parts.txt + - info on SD and MMC device partitions diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt index ff2bd685bced..8898a95b41e5 100644 --- a/Documentation/mmc/mmc-dev-attrs.txt +++ b/Documentation/mmc/mmc-dev-attrs.txt @@ -1,3 +1,13 @@ +SD and MMC Block Device Attributes +================================== + +These attributes are defined for the block devices associated with the +SD or MMC device. + +The following attributes are read/write. + + force_ro Enforce read-only access even if write protect switch is off. + SD and MMC Device Attributes ============================ diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt new file mode 100644 index 000000000000..2db28b8e662f --- /dev/null +++ b/Documentation/mmc/mmc-dev-parts.txt @@ -0,0 +1,27 @@ +SD and MMC Device Partitions +============================ + +Device partitions are additional logical block devices present on the +SD/MMC device. + +As of this writing, MMC boot partitions as supported and exposed as +/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the +parent /dev/mmcblkX. + +MMC Boot Partitions +=================== + +Read and write access is provided to the two MMC boot partitions. Due to +the sensitive nature of the boot partition contents, which often store +a bootloader or bootloader configuration tables crucial to booting the +platform, write access is disabled by default to reduce the chance of +accidental bricking. + +To enable write access to /dev/mmcblkXbootY, disable the forced read-only +access with: + +echo 0 > /sys/block/mmcblkXbootY/force_ro + +To re-enable read-only access: + +echo 1 > /sys/block/mmcblkXbootY/force_ro diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index ee496eb2f4a6..88d4afbdef98 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -1,4 +1,4 @@ -[state: 27-01-2011] +[state: 17-04-2011] BATMAN-ADV ---------- @@ -19,6 +19,7 @@ duce the overhead to a minimum. It does not depend on any (other) network driver, and can be used on wifi as well as ethernet lan, vpn, etc ... (anything with ethernet-style layer 2). + CONFIGURATION ------------- @@ -160,13 +161,13 @@ face. Each entry can/has to have the following values: -> "TQ mac value" - src mac's link quality towards mac address of a neighbor originator's interface which is being used for routing --> "HNA mac" - HNA announced by source mac +-> "TT mac" - TT announced by source mac -> "PRIMARY" - this is a primary interface -> "SEC mac" - secondary mac address of source (requires preceding PRIMARY) The TQ value has a range from 4 to 255 with 255 being the best. -The HNA entries are showing which hosts are connected to the mesh +The TT entries are showing which hosts are connected to the mesh via bat0 or being bridged into the mesh network. The PRIMARY/SEC values are only applied on primary interfaces @@ -199,7 +200,7 @@ abled during run time. Following log_levels are defined: 0 - All debug output disabled 1 - Enable messages related to routing / flooding / broadcasting -2 - Enable route or hna added / changed / deleted +2 - Enable route or tt entry added / changed / deleted 3 - Enable all messages The debug output can be changed at runtime using the file @@ -207,7 +208,7 @@ The debug output can be changed at runtime using the file # echo 2 > /sys/class/net/bat0/mesh/log_level -will enable debug messages for when routes or HNAs change. +will enable debug messages for when routes or TTs change. BATCTL diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index e27202bb8d75..675612ff41ae 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -1,7 +1,7 @@ Linux Ethernet Bonding Driver HOWTO - Latest update: 23 September 2009 + Latest update: 27 April 2011 Initial release : Thomas Davis Corrections, HA extensions : 2000/10/03-15 : @@ -585,25 +585,23 @@ mode chosen. num_grat_arp - - Specifies the number of gratuitous ARPs to be issued after a - failover event. One gratuitous ARP is issued immediately after - the failover, subsequent ARPs are sent at a rate of one per link - monitor interval (arp_interval or miimon, whichever is active). - - The valid range is 0 - 255; the default value is 1. This option - affects only the active-backup mode. This option was added for - bonding version 3.3.0. - num_unsol_na - Specifies the number of unsolicited IPv6 Neighbor Advertisements - to be issued after a failover event. One unsolicited NA is issued - immediately after the failover. + Specify the number of peer notifications (gratuitous ARPs and + unsolicited IPv6 Neighbor Advertisements) to be issued after a + failover event. As soon as the link is up on the new slave + (possibly immediately) a peer notification is sent on the + bonding device and each VLAN sub-device. This is repeated at + each link monitor interval (arp_interval or miimon, whichever + is active) if the number is greater than 1. - The valid range is 0 - 255; the default value is 1. This option - affects only the active-backup mode. This option was added for - bonding version 3.4.0. + The valid range is 0 - 255; the default value is 1. These options + affect only the active-backup mode. These options were added for + bonding versions 3.3.0 and 3.4.0 respectively. + + From Linux 2.6.40 and bonding version 3.7.1, these notifications + are generated by the ipv4 and ipv6 code and the numbers of + repetitions cannot be set independently. primary @@ -772,8 +770,17 @@ resend_igmp a failover event. One membership report is issued immediately after the failover, subsequent packets are sent in each 200ms interval. - The valid range is 0 - 255; the default value is 1. This option - was added for bonding version 3.7.0. + The valid range is 0 - 255; the default value is 1. A value of 0 + prevents the IGMP membership report from being issued in response + to the failover event. + + This option is useful for bonding modes balance-rr (0), active-backup + (1), balance-tlb (5) and balance-alb (6), in which a failover can + switch the IGMP traffic from one slave to another. Therefore a fresh + IGMP report must be issued to cause the switch to forward the incoming + IGMP traffic over the newly selected slave. + + This option was added for bonding version 3.7.0. 3. Configuring Bonding Devices ============================== diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt index 04ca06325b08..7f531ad83285 100644 --- a/Documentation/networking/dns_resolver.txt +++ b/Documentation/networking/dns_resolver.txt @@ -139,8 +139,8 @@ the key will be discarded and recreated when the data it holds has expired. dns_query() returns a copy of the value attached to the key, or an error if that is indicated instead. -See for further information about -request-key function. +See for further +information about request-key function. ========= diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt index 98953c0d5342..9a2a037194a5 100644 --- a/Documentation/networking/igb.txt +++ b/Documentation/networking/igb.txt @@ -93,6 +93,19 @@ Additional Configurations REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not found, the system will fallback to MSI or to Legacy interrupts. + MAC and VLAN anti-spoofing feature + ---------------------------------- + When a malicious driver attempts to send a spoofed packet, it is dropped by + the hardware and not transmitted. An interrupt is sent to the PF driver + notifying it of the spoof attempt. + + When a spoofed packet is detected the PF driver will send the following + message to the system log (displayed by the "dmesg" command): + + Spoof event(s) detected on VF(n) + + Where n=the VF that attempted to do the spoofing. + Support ======= diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 1971bcf48a60..88880839ece4 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -279,11 +279,15 @@ When the system goes into the standby or memory sleep state, the phases are: time.) Unlike the other suspend-related phases, during the prepare phase the device tree is traversed top-down. - The prepare phase uses only a bus callback. After the callback method - returns, no new children may be registered below the device. The method - may also prepare the device or driver in some way for the upcoming - system power transition, but it should not put the device into a - low-power state. + In addition to that, if device drivers need to allocate additional + memory to be able to hadle device suspend correctly, that should be + done in the prepare phase. + + After the prepare callback method returns, no new children may be + registered below the device. The method may also prepare the device or + driver in some way for the upcoming system power transition (for + example, by allocating additional memory required for this purpose), but + it should not put the device into a low-power state. 2. The suspend methods should quiesce the device to stop it from performing I/O. They also may save the device registers and put it into the diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt index cf980709122a..c2a4a346c0d9 100644 --- a/Documentation/power/notifiers.txt +++ b/Documentation/power/notifiers.txt @@ -1,46 +1,41 @@ Suspend notifiers - (C) 2007 Rafael J. Wysocki , GPL + (C) 2007-2011 Rafael J. Wysocki , GPL -There are some operations that device drivers may want to carry out in their -.suspend() routines, but shouldn't, because they can cause the hibernation or -suspend to fail. For example, a driver may want to allocate a substantial amount -of memory (like 50 MB) in .suspend(), but that shouldn't be done after the -swsusp's memory shrinker has run. +There are some operations that subsystems or drivers may want to carry out +before hibernation/suspend or after restore/resume, but they require the system +to be fully functional, so the drivers' and subsystems' .suspend() and .resume() +or even .prepare() and .complete() callbacks are not suitable for this purpose. +For example, device drivers may want to upload firmware to their devices after +resume/restore, but they cannot do it by calling request_firmware() from their +.resume() or .complete() routines (user land processes are frozen at these +points). The solution may be to load the firmware into memory before processes +are frozen and upload it from there in the .resume() routine. +A suspend/hibernation notifier may be used for this purpose. -Also, there may be some operations, that subsystems want to carry out before a -hibernation/suspend or after a restore/resume, requiring the system to be fully -functional, so the drivers' .suspend() and .resume() routines are not suitable -for this purpose. For example, device drivers may want to upload firmware to -their devices after a restore from a hibernation image, but they cannot do it by -calling request_firmware() from their .resume() routines (user land processes -are frozen at this point). The solution may be to load the firmware into -memory before processes are frozen and upload it from there in the .resume() -routine. Of course, a hibernation notifier may be used for this purpose. - -The subsystems that have such needs can register suspend notifiers that will be -called upon the following events by the suspend core: +The subsystems or drivers having such needs can register suspend notifiers that +will be called upon the following events by the PM core: PM_HIBERNATION_PREPARE The system is going to hibernate or suspend, tasks will be frozen immediately. PM_POST_HIBERNATION The system memory state has been restored from a - hibernation image or an error occurred during the - hibernation. Device drivers' .resume() callbacks have + hibernation image or an error occurred during + hibernation. Device drivers' restore callbacks have been executed and tasks have been thawed. PM_RESTORE_PREPARE The system is going to restore a hibernation image. - If all goes well the restored kernel will issue a + If all goes well, the restored kernel will issue a PM_POST_HIBERNATION notification. -PM_POST_RESTORE An error occurred during the hibernation restore. - Device drivers' .resume() callbacks have been executed +PM_POST_RESTORE An error occurred during restore from hibernation. + Device drivers' restore callbacks have been executed and tasks have been thawed. -PM_SUSPEND_PREPARE The system is preparing for a suspend. +PM_SUSPEND_PREPARE The system is preparing for suspend. PM_POST_SUSPEND The system has just resumed or an error occurred during - the suspend. Device drivers' .resume() callbacks have - been executed and tasks have been thawed. + suspend. Device drivers' resume callbacks have been + executed and tasks have been thawed. It is generally assumed that whatever the notifiers do for PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously, diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt index bdec39b9bd75..b42419b52e44 100644 --- a/Documentation/power/regulator/machine.txt +++ b/Documentation/power/regulator/machine.txt @@ -53,11 +53,11 @@ static struct regulator_init_data regulator1_data = { Regulator-1 supplies power to Regulator-2. This relationship must be registered with the core so that Regulator-1 is also enabled when Consumer A enables its -supply (Regulator-2). The supply regulator is set by the supply_regulator_dev +supply (Regulator-2). The supply regulator is set by the supply_regulator field below:- static struct regulator_init_data regulator2_data = { - .supply_regulator_dev = &platform_regulator1_device.dev, + .supply_regulator = "regulator_name", .constraints = { .min_uV = 1800000, .max_uV = 2000000, diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt index 1b5a5ddbc3ef..5df176ed59b8 100644 --- a/Documentation/printk-formats.txt +++ b/Documentation/printk-formats.txt @@ -9,7 +9,121 @@ If variable is of Type, use printk format specifier: size_t %zu or %zx ssize_t %zd or %zx -Raw pointer value SHOULD be printed with %p. +Raw pointer value SHOULD be printed with %p. The kernel supports +the following extended format specifiers for pointer types: + +Symbols/Function Pointers: + + %pF versatile_init+0x0/0x110 + %pf versatile_init + %pS versatile_init+0x0/0x110 + %ps versatile_init + %pB prev_fn_of_versatile_init+0x88/0x88 + + For printing symbols and function pointers. The 'S' and 's' specifiers + result in the symbol name with ('S') or without ('s') offsets. Where + this is used on a kernel without KALLSYMS - the symbol address is + printed instead. + + The 'B' specifier results in the symbol name with offsets and should be + used when printing stack backtraces. The specifier takes into + consideration the effect of compiler optimisations which may occur + when tail-call's are used and marked with the noreturn GCC attribute. + + On ia64, ppc64 and parisc64 architectures function pointers are + actually function descriptors which must first be resolved. The 'F' and + 'f' specifiers perform this resolution and then provide the same + functionality as the 'S' and 's' specifiers. + +Kernel Pointers: + + %pK 0x01234567 or 0x0123456789abcdef + + For printing kernel pointers which should be hidden from unprivileged + users. The behaviour of %pK depends on the kptr_restrict sysctl - see + Documentation/sysctl/kernel.txt for more details. + +Struct Resources: + + %pr [mem 0x60000000-0x6fffffff flags 0x2200] or + [mem 0x0000000060000000-0x000000006fffffff flags 0x2200] + %pR [mem 0x60000000-0x6fffffff pref] or + [mem 0x0000000060000000-0x000000006fffffff pref] + + For printing struct resources. The 'R' and 'r' specifiers result in a + printed resource with ('R') or without ('r') a decoded flags member. + +MAC/FDDI addresses: + + %pM 00:01:02:03:04:05 + %pMF 00-01-02-03-04-05 + %pm 000102030405 + + For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm' + specifiers result in a printed address with ('M') or without ('m') byte + separators. The default byte separator is the colon (':'). + + Where FDDI addresses are concerned the 'F' specifier can be used after + the 'M' specifier to use dash ('-') separators instead of the default + separator. + +IPv4 addresses: + + %pI4 1.2.3.4 + %pi4 001.002.003.004 + %p[Ii][hnbl] + + For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4' + specifiers result in a printed address with ('i4') or without ('I4') + leading zeros. + + The additional 'h', 'n', 'b', and 'l' specifiers are used to specify + host, network, big or little endian order addresses respectively. Where + no specifier is provided the default network/big endian order is used. + +IPv6 addresses: + + %pI6 0001:0002:0003:0004:0005:0006:0007:0008 + %pi6 00010002000300040005000600070008 + %pI6c 1:2:3:4:5:6:7:8 + + For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6' + specifiers result in a printed address with ('I6') or without ('i6') + colon-separators. Leading zeros are always used. + + The additional 'c' specifier can be used with the 'I' specifier to + print a compressed IPv6 address as described by + http://tools.ietf.org/html/rfc5952 + +UUID/GUID addresses: + + %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f + %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F + %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f + %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F + + For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L', + 'b' and 'B' specifiers are used to specify a little endian order in + lower ('l') or upper case ('L') hex characters - and big endian order + in lower ('b') or upper case ('B') hex characters. + + Where no additional specifiers are used the default little endian + order with lower case hex characters will be printed. + +struct va_format: + + %pV + + For printing struct va_format structures. These contain a format string + and va_list as follows: + + struct va_format { + const char *fmt; + va_list *va; + }; + + Do not use this feature without some mechanism to verify the + correctness of the format string and va_list arguments. u64 SHOULD be printed with %llu/%llx, (unsigned long long): @@ -32,4 +146,5 @@ Reminder: sizeof() result is of type size_t. Thank you for your cooperation and attention. -By Randy Dunlap +By Randy Dunlap and +Andrew Murray diff --git a/Documentation/pti/pti_intel_mid.txt b/Documentation/pti/pti_intel_mid.txt new file mode 100644 index 000000000000..e7a5b6d1f7a9 --- /dev/null +++ b/Documentation/pti/pti_intel_mid.txt @@ -0,0 +1,99 @@ +The Intel MID PTI project is HW implemented in Intel Atom +system-on-a-chip designs based on the Parallel Trace +Interface for MIPI P1149.7 cJTAG standard. The kernel solution +for this platform involves the following files: + +./include/linux/pti.h +./drivers/.../n_tracesink.h +./drivers/.../n_tracerouter.c +./drivers/.../n_tracesink.c +./drivers/.../pti.c + +pti.c is the driver that enables various debugging features +popular on platforms from certain mobile manufacturers. +n_tracerouter.c and n_tracesink.c allow extra system information to +be collected and routed to the pti driver, such as trace +debugging data from a modem. Although n_tracerouter +and n_tracesink are a part of the complete PTI solution, +these two line disciplines can work separately from +pti.c and route any data stream from one /dev/tty node +to another /dev/tty node via kernel-space. This provides +a stable, reliable connection that will not break unless +the user-space application shuts down (plus avoids +kernel->user->kernel context switch overheads of routing +data). + +An example debugging usage for this driver system: + *Hook /dev/ttyPTI0 to syslogd. Opening this port will also start + a console device to further capture debugging messages to PTI. + *Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW. + This is where n_tracerouter and n_tracesink are used. + *Hook /dev/pti to a user-level debugging application for writing + to PTI HW. + *Use mipi_* Kernel Driver API in other device drivers for + debugging to PTI by first requesting a PTI write address via + mipi_request_masterchannel(1). + +Below is example pseudo-code on how a 'privileged' application +can hook up n_tracerouter and n_tracesink to any tty on +a system. 'Privileged' means the application has enough +privileges to successfully manipulate the ldisc drivers +but is not just blindly executing as 'root'. Keep in mind +the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter +and n_tracesink line discpline drivers but is a generic +operation for a program to use a line discpline driver +on a tty port other than the default n_tty. + +/////////// To hook up n_tracerouter and n_tracesink ///////// + +// Note that n_tracerouter depends on n_tracesink. +#include +#define ONE_TTY "/dev/ttyOne" +#define TWO_TTY "/dev/ttyTwo" + +// needed global to hand onto ldisc connection +static int g_fd_source = -1; +static int g_fd_sink = -1; + +// these two vars used to grab LDISC values from loaded ldisc drivers +// in OS. Look at /proc/tty/ldiscs to get the right numbers from +// the ldiscs loaded in the system. +int source_ldisc_num, sink_ldisc_num = -1; +int retval; + +g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W +g_fd_sink = open(TWO_TTY, O_RDWR); // must be R/W + +if (g_fd_source <= 0) || (g_fd_sink <= 0) { + // doubt you'll want to use these exact error lines of code + printf("Error on open(). errno: %d\n",errno); + return errno; +} + +retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num); +if (retval < 0) { + printf("Error on ioctl(). errno: %d\n", errno); + return errno; +} + +retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num); +if (retval < 0) { + printf("Error on ioctl(). errno: %d\n", errno); + return errno; +} + +/////////// To disconnect n_tracerouter and n_tracesink //////// + +// First make sure data through the ldiscs has stopped. + +// Second, disconnect ldiscs. This provides a +// little cleaner shutdown on tty stack. +sink_ldisc_num = 0; +source_ldisc_num = 0; +ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num); +ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num); + +// Three, program closes connection, and cleanup: +close(g_fd_uart); +close(g_fd_gadget); +g_fd_uart = g_fd_gadget = NULL; diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt new file mode 100644 index 000000000000..ae8fef86b832 --- /dev/null +++ b/Documentation/ptp/ptp.txt @@ -0,0 +1,89 @@ + +* PTP hardware clock infrastructure for Linux + + This patch set introduces support for IEEE 1588 PTP clocks in + Linux. Together with the SO_TIMESTAMPING socket options, this + presents a standardized method for developing PTP user space + programs, synchronizing Linux with external clocks, and using the + ancillary features of PTP hardware clocks. + + A new class driver exports a kernel interface for specific clock + drivers and a user space interface. The infrastructure supports a + complete set of PTP hardware clock functionality. + + + Basic clock operations + - Set time + - Get time + - Shift the clock by a given offset atomically + - Adjust clock frequency + + + Ancillary clock features + - One short or periodic alarms, with signal delivery to user program + - Time stamp external events + - Period output signals configurable from user space + - Synchronization of the Linux system time via the PPS subsystem + +** PTP hardware clock kernel API + + A PTP clock driver registers itself with the class driver. The + class driver handles all of the dealings with user space. The + author of a clock driver need only implement the details of + programming the clock hardware. The clock driver notifies the class + driver of asynchronous events (alarms and external time stamps) via + a simple message passing interface. + + The class driver supports multiple PTP clock drivers. In normal use + cases, only one PTP clock is needed. However, for testing and + development, it can be useful to have more than one clock in a + single system, in order to allow performance comparisons. + +** PTP hardware clock user space API + + The class driver also creates a character device for each + registered clock. User space can use an open file descriptor from + the character device as a POSIX clock id and may call + clock_gettime, clock_settime, and clock_adjtime. These calls + implement the basic clock operations. + + User space programs may control the clock using standardized + ioctls. A program may query, enable, configure, and disable the + ancillary clock features. User space can receive time stamped + events via blocking read() and poll(). One shot and periodic + signals may be configured via the POSIX timer_settime() system + call. + +** Writing clock drivers + + Clock drivers include include/linux/ptp_clock_kernel.h and register + themselves by presenting a 'struct ptp_clock_info' to the + registration method. Clock drivers must implement all of the + functions in the interface. If a clock does not offer a particular + ancillary feature, then the driver should just return -EOPNOTSUPP + from those functions. + + Drivers must ensure that all of the methods in interface are + reentrant. Since most hardware implementations treat the time value + as a 64 bit integer accessed as two 32 bit registers, drivers + should use spin_lock_irqsave/spin_unlock_irqrestore to protect + against concurrent access. This locking cannot be accomplished in + class driver, since the lock may also be needed by the clock + driver's interrupt service routine. + +** Supported hardware + + + Freescale eTSEC gianfar + - 2 Time stamp external triggers, programmable polarity (opt. interrupt) + - 2 Alarm registers (optional interrupt) + - 3 Periodic signals (optional interrupt) + + + National DP83640 + - 6 GPIOs programmable as inputs or outputs + - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be + used as general inputs or outputs + - GPIO inputs can time stamp external triggers + - GPIO outputs can produce periodic signals + - 1 interrupt pin + + + Intel IXP465 + - Auxiliary Slave/Master Mode Snapshot (optional interrupt) + - Target Time (optional interrupt) diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c new file mode 100644 index 000000000000..f59ded066108 --- /dev/null +++ b/Documentation/ptp/testptp.c @@ -0,0 +1,381 @@ +/* + * PTP 1588 clock support - User space test program + * + * Copyright (C) 2010 OMICRON electronics GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define DEVICE "/dev/ptp0" + +#ifndef ADJ_SETOFFSET +#define ADJ_SETOFFSET 0x0100 +#endif + +#ifndef CLOCK_INVALID +#define CLOCK_INVALID -1 +#endif + +/* When glibc offers the syscall, this will go away. */ +#include +static int clock_adjtime(clockid_t id, struct timex *tx) +{ + return syscall(__NR_clock_adjtime, id, tx); +} + +static clockid_t get_clockid(int fd) +{ +#define CLOCKFD 3 +#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD) + + return FD_TO_CLOCKID(fd); +} + +static void handle_alarm(int s) +{ + printf("received signal %d\n", s); +} + +static int install_handler(int signum, void (*handler)(int)) +{ + struct sigaction action; + sigset_t mask; + + /* Unblock the signal. */ + sigemptyset(&mask); + sigaddset(&mask, signum); + sigprocmask(SIG_UNBLOCK, &mask, NULL); + + /* Install the signal handler. */ + action.sa_handler = handler; + action.sa_flags = 0; + sigemptyset(&action.sa_mask); + sigaction(signum, &action, NULL); + + return 0; +} + +static long ppb_to_scaled_ppm(int ppb) +{ + /* + * The 'freq' field in the 'struct timex' is in parts per + * million, but with a 16 bit binary fractional field. + * Instead of calculating either one of + * + * scaled_ppm = (ppb / 1000) << 16 [1] + * scaled_ppm = (ppb << 16) / 1000 [2] + * + * we simply use double precision math, in order to avoid the + * truncation in [1] and the possible overflow in [2]. + */ + return (long) (ppb * 65.536); +} + +static void usage(char *progname) +{ + fprintf(stderr, + "usage: %s [options]\n" + " -a val request a one-shot alarm after 'val' seconds\n" + " -A val request a periodic alarm every 'val' seconds\n" + " -c query the ptp clock's capabilities\n" + " -d name device to open\n" + " -e val read 'val' external time stamp events\n" + " -f val adjust the ptp clock frequency by 'val' ppb\n" + " -g get the ptp clock time\n" + " -h prints this message\n" + " -p val enable output with a period of 'val' nanoseconds\n" + " -P val enable or disable (val=1|0) the system clock PPS\n" + " -s set the ptp clock time from the system time\n" + " -S set the system time from the ptp clock time\n" + " -t val shift the ptp clock time by 'val' seconds\n", + progname); +} + +int main(int argc, char *argv[]) +{ + struct ptp_clock_caps caps; + struct ptp_extts_event event; + struct ptp_extts_request extts_request; + struct ptp_perout_request perout_request; + struct timespec ts; + struct timex tx; + + static timer_t timerid; + struct itimerspec timeout; + struct sigevent sigevent; + + char *progname; + int c, cnt, fd; + + char *device = DEVICE; + clockid_t clkid; + int adjfreq = 0x7fffffff; + int adjtime = 0; + int capabilities = 0; + int extts = 0; + int gettime = 0; + int oneshot = 0; + int periodic = 0; + int perout = -1; + int pps = -1; + int settime = 0; + + progname = strrchr(argv[0], '/'); + progname = progname ? 1+progname : argv[0]; + while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghp:P:sSt:v"))) { + switch (c) { + case 'a': + oneshot = atoi(optarg); + break; + case 'A': + periodic = atoi(optarg); + break; + case 'c': + capabilities = 1; + break; + case 'd': + device = optarg; + break; + case 'e': + extts = atoi(optarg); + break; + case 'f': + adjfreq = atoi(optarg); + break; + case 'g': + gettime = 1; + break; + case 'p': + perout = atoi(optarg); + break; + case 'P': + pps = atoi(optarg); + break; + case 's': + settime = 1; + break; + case 'S': + settime = 2; + break; + case 't': + adjtime = atoi(optarg); + break; + case 'h': + usage(progname); + return 0; + case '?': + default: + usage(progname); + return -1; + } + } + + fd = open(device, O_RDWR); + if (fd < 0) { + fprintf(stderr, "opening %s: %s\n", device, strerror(errno)); + return -1; + } + + clkid = get_clockid(fd); + if (CLOCK_INVALID == clkid) { + fprintf(stderr, "failed to read clock id\n"); + return -1; + } + + if (capabilities) { + if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { + perror("PTP_CLOCK_GETCAPS"); + } else { + printf("capabilities:\n" + " %d maximum frequency adjustment (ppb)\n" + " %d programmable alarms\n" + " %d external time stamp channels\n" + " %d programmable periodic signals\n" + " %d pulse per second\n", + caps.max_adj, + caps.n_alarm, + caps.n_ext_ts, + caps.n_per_out, + caps.pps); + } + } + + if (0x7fffffff != adjfreq) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_FREQUENCY; + tx.freq = ppb_to_scaled_ppm(adjfreq); + if (clock_adjtime(clkid, &tx)) { + perror("clock_adjtime"); + } else { + puts("frequency adjustment okay"); + } + } + + if (adjtime) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_SETOFFSET; + tx.time.tv_sec = adjtime; + tx.time.tv_usec = 0; + if (clock_adjtime(clkid, &tx) < 0) { + perror("clock_adjtime"); + } else { + puts("time shift okay"); + } + } + + if (gettime) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + } else { + printf("clock time: %ld.%09ld or %s", + ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec)); + } + } + + if (settime == 1) { + clock_gettime(CLOCK_REALTIME, &ts); + if (clock_settime(clkid, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (settime == 2) { + clock_gettime(clkid, &ts); + if (clock_settime(CLOCK_REALTIME, &ts)) { + perror("clock_settime"); + } else { + puts("set time okay"); + } + } + + if (extts) { + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = 0; + extts_request.flags = PTP_ENABLE_FEATURE; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + extts = 0; + } else { + puts("external time stamp request okay"); + } + for (; extts; extts--) { + cnt = read(fd, &event, sizeof(event)); + if (cnt != sizeof(event)) { + perror("read"); + break; + } + printf("event index %u at %lld.%09u\n", event.index, + event.t.sec, event.t.nsec); + fflush(stdout); + } + /* Disable the feature again. */ + extts_request.flags = 0; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + } + } + + if (oneshot) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_value.tv_sec = oneshot; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + pause(); + timer_delete(timerid); + } + + if (periodic) { + install_handler(SIGALRM, handle_alarm); + /* Create a timer. */ + sigevent.sigev_notify = SIGEV_SIGNAL; + sigevent.sigev_signo = SIGALRM; + if (timer_create(clkid, &sigevent, &timerid)) { + perror("timer_create"); + return -1; + } + /* Start the timer. */ + memset(&timeout, 0, sizeof(timeout)); + timeout.it_interval.tv_sec = periodic; + timeout.it_value.tv_sec = periodic; + if (timer_settime(timerid, 0, &timeout, NULL)) { + perror("timer_settime"); + return -1; + } + while (1) { + pause(); + } + timer_delete(timerid); + } + + if (perout >= 0) { + if (clock_gettime(clkid, &ts)) { + perror("clock_gettime"); + return -1; + } + memset(&perout_request, 0, sizeof(perout_request)); + perout_request.index = 0; + perout_request.start.sec = ts.tv_sec + 2; + perout_request.start.nsec = 0; + perout_request.period.sec = 0; + perout_request.period.nsec = perout; + if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) { + perror("PTP_PEROUT_REQUEST"); + } else { + puts("periodic output request okay"); + } + } + + if (pps != -1) { + int enable = pps ? 1 : 0; + if (ioctl(fd, PTP_ENABLE_PPS, enable)) { + perror("PTP_ENABLE_PPS"); + } else { + puts("pps for system time request okay"); + } + } + + close(fd); + return 0; +} diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk new file mode 100644 index 000000000000..4ef2d9755421 --- /dev/null +++ b/Documentation/ptp/testptp.mk @@ -0,0 +1,33 @@ +# PTP 1588 clock support - User space test program +# +# Copyright (C) 2010 OMICRON electronics GmbH +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +CC = $(CROSS_COMPILE)gcc +INC = -I$(KBUILD_OUTPUT)/usr/include +CFLAGS = -Wall $(INC) +LDLIBS = -lrt +PROGS = testptp + +all: $(PROGS) + +testptp: testptp.o + +clean: + rm -f testptp.o + +distclean: clean + rm -f $(PROGS) diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 99961993257a..91ecff07cede 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -223,9 +223,10 @@ When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each group created using the pseudo filesystem. See example steps below to create task groups and modify their CPU share using the "cgroups" pseudo filesystem. - # mkdir /dev/cpuctl - # mount -t cgroup -ocpu none /dev/cpuctl - # cd /dev/cpuctl + # mount -t tmpfs cgroup_root /sys/fs/cgroup + # mkdir /sys/fs/cgroup/cpu + # mount -t cgroup -ocpu none /sys/fs/cgroup/cpu + # cd /sys/fs/cgroup/cpu # mkdir multimedia # create "multimedia" group of tasks # mkdir browser # create "browser" group of tasks diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt index 605b0d40329d..71b54d549987 100644 --- a/Documentation/scheduler/sched-rt-group.txt +++ b/Documentation/scheduler/sched-rt-group.txt @@ -129,9 +129,8 @@ priority! Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real CPU bandwidth to task groups. -This uses the /cgroup virtual file system and -"/cgroup//cpu.rt_runtime_us" to control the CPU time reserved for each -control group. +This uses the cgroup virtual file system and "/cpu.rt_runtime_us" +to control the CPU time reserved for each control group. For more information on working with control groups, you should read Documentation/cgroups/cgroups.txt as well. @@ -150,7 +149,7 @@ For now, this can be simplified to just the following (but see Future plans): =============== There is work in progress to make the scheduling period for each group -("/cgroup//cpu.rt_period_us") configurable as well. +("/cpu.rt_period_us") configurable as well. The constraint on the period is that a subgroup must have a smaller or equal period to its parent. But realistically its not very useful _yet_ diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas index 4d9ce73ff730..9ed1d9d96783 100644 --- a/Documentation/scsi/ChangeLog.megaraid_sas +++ b/Documentation/scsi/ChangeLog.megaraid_sas @@ -1,3 +1,17 @@ +Release Date : Wed. May 11, 2011 17:00:00 PST 2010 - + (emaild-id:megaraidlinux@lsi.com) + Adam Radford +Current Version : 00.00.05.38-rc1 +Old Version : 00.00.05.34-rc1 + 1. Remove MSI-X black list, use MFI_REG_STATE.ready.msiEnable. + 2. Remove un-used function megasas_return_cmd_for_smid(). + 3. Check MFI_REG_STATE.fault.resetAdapter in megasas_reset_fusion(). + 4. Disable interrupts/free_irq() in megasas_shutdown(). + 5. Fix bug where AENs could be lost in probe() and resume(). + 6. Convert 6,10,12 byte CDB's to 16 byte CDB for large LBA's for FastPath + IO. + 7. Add 1078 OCR support. +------------------------------------------------------------------------------- Release Date : Thu. Feb 24, 2011 17:00:00 PST 2010 - (emaild-id:megaraidlinux@lsi.com) Adam Radford diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx index 9e15b4f9cd28..19e7cd4bba66 100644 --- a/Documentation/scsi/LICENSE.qla2xxx +++ b/Documentation/scsi/LICENSE.qla2xxx @@ -1,11 +1,11 @@ -Copyright (c) 2003-2005 QLogic Corporation -QLogic Linux Fibre Channel HBA Driver +Copyright (c) 2003-2011 QLogic Corporation +QLogic Linux/ESX Fibre Channel HBA Driver -This program includes a device driver for Linux 2.6 that may be +This program includes a device driver for Linux 2.6/ESX that may be distributed with QLogic hardware specific firmware binary file. You may modify and redistribute the device driver code under the -GNU General Public License as published by the Free Software -Foundation (version 2 or a later version). +GNU General Public License (a copy of which is attached hereto as +Exhibit A) published by the Free Software Foundation (version 2). You may redistribute the hardware specific firmware binary file under the following terms: @@ -43,3 +43,285 @@ OTHERWISE IN ANY INTELLECTUAL PROPERTY RIGHTS (PATENT, COPYRIGHT, TRADE SECRET, MASK WORK, OR OTHER PROPRIETARY RIGHT) EMBODIED IN ANY OTHER QLOGIC HARDWARE OR SOFTWARE EITHER SOLELY OR IN COMBINATION WITH THIS PROGRAM. + + +EXHIBIT A + + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX new file mode 100644 index 000000000000..19bc49439cac --- /dev/null +++ b/Documentation/security/00-INDEX @@ -0,0 +1,18 @@ +00-INDEX + - this file. +SELinux.txt + - how to get started with the SELinux security enhancement. +Smack.txt + - documentation on the Smack Linux Security Module. +apparmor.txt + - documentation on the AppArmor security extension. +credentials.txt + - documentation about credentials in Linux. +keys-request-key.txt + - description of the kernel key request service. +keys-trusted-encrypted.txt + - info on the Trusted and Encrypted keys in the kernel key ring service. +keys.txt + - description of the kernel key retention service. +tomoyo.txt + - documentation on the TOMOYO Linux Security Module. diff --git a/Documentation/SELinux.txt b/Documentation/security/SELinux.txt similarity index 100% rename from Documentation/SELinux.txt rename to Documentation/security/SELinux.txt diff --git a/Documentation/Smack.txt b/Documentation/security/Smack.txt similarity index 100% rename from Documentation/Smack.txt rename to Documentation/security/Smack.txt diff --git a/Documentation/apparmor.txt b/Documentation/security/apparmor.txt similarity index 100% rename from Documentation/apparmor.txt rename to Documentation/security/apparmor.txt diff --git a/Documentation/security/credentials.txt b/Documentation/security/credentials.txt new file mode 100644 index 000000000000..fc0366cbd7ce --- /dev/null +++ b/Documentation/security/credentials.txt @@ -0,0 +1,581 @@ + ==================== + CREDENTIALS IN LINUX + ==================== + +By: David Howells + +Contents: + + (*) Overview. + + (*) Types of credentials. + + (*) File markings. + + (*) Task credentials. + + - Immutable credentials. + - Accessing task credentials. + - Accessing another task's credentials. + - Altering credentials. + - Managing credentials. + + (*) Open file credentials. + + (*) Overriding the VFS's use of credentials. + + +======== +OVERVIEW +======== + +There are several parts to the security check performed by Linux when one +object acts upon another: + + (1) Objects. + + Objects are things in the system that may be acted upon directly by + userspace programs. Linux has a variety of actionable objects, including: + + - Tasks + - Files/inodes + - Sockets + - Message queues + - Shared memory segments + - Semaphores + - Keys + + As a part of the description of all these objects there is a set of + credentials. What's in the set depends on the type of object. + + (2) Object ownership. + + Amongst the credentials of most objects, there will be a subset that + indicates the ownership of that object. This is used for resource + accounting and limitation (disk quotas and task rlimits for example). + + In a standard UNIX filesystem, for instance, this will be defined by the + UID marked on the inode. + + (3) The objective context. + + Also amongst the credentials of those objects, there will be a subset that + indicates the 'objective context' of that object. This may or may not be + the same set as in (2) - in standard UNIX files, for instance, this is the + defined by the UID and the GID marked on the inode. + + The objective context is used as part of the security calculation that is + carried out when an object is acted upon. + + (4) Subjects. + + A subject is an object that is acting upon another object. + + Most of the objects in the system are inactive: they don't act on other + objects within the system. Processes/tasks are the obvious exception: + they do stuff; they access and manipulate things. + + Objects other than tasks may under some circumstances also be subjects. + For instance an open file may send SIGIO to a task using the UID and EUID + given to it by a task that called fcntl(F_SETOWN) upon it. In this case, + the file struct will have a subjective context too. + + (5) The subjective context. + + A subject has an additional interpretation of its credentials. A subset + of its credentials forms the 'subjective context'. The subjective context + is used as part of the security calculation that is carried out when a + subject acts. + + A Linux task, for example, has the FSUID, FSGID and the supplementary + group list for when it is acting upon a file - which are quite separate + from the real UID and GID that normally form the objective context of the + task. + + (6) Actions. + + Linux has a number of actions available that a subject may perform upon an + object. The set of actions available depends on the nature of the subject + and the object. + + Actions include reading, writing, creating and deleting files; forking or + signalling and tracing tasks. + + (7) Rules, access control lists and security calculations. + + When a subject acts upon an object, a security calculation is made. This + involves taking the subjective context, the objective context and the + action, and searching one or more sets of rules to see whether the subject + is granted or denied permission to act in the desired manner on the + object, given those contexts. + + There are two main sources of rules: + + (a) Discretionary access control (DAC): + + Sometimes the object will include sets of rules as part of its + description. This is an 'Access Control List' or 'ACL'. A Linux + file may supply more than one ACL. + + A traditional UNIX file, for example, includes a permissions mask that + is an abbreviated ACL with three fixed classes of subject ('user', + 'group' and 'other'), each of which may be granted certain privileges + ('read', 'write' and 'execute' - whatever those map to for the object + in question). UNIX file permissions do not allow the arbitrary + specification of subjects, however, and so are of limited use. + + A Linux file might also sport a POSIX ACL. This is a list of rules + that grants various permissions to arbitrary subjects. + + (b) Mandatory access control (MAC): + + The system as a whole may have one or more sets of rules that get + applied to all subjects and objects, regardless of their source. + SELinux and Smack are examples of this. + + In the case of SELinux and Smack, each object is given a label as part + of its credentials. When an action is requested, they take the + subject label, the object label and the action and look for a rule + that says that this action is either granted or denied. + + +==================== +TYPES OF CREDENTIALS +==================== + +The Linux kernel supports the following types of credentials: + + (1) Traditional UNIX credentials. + + Real User ID + Real Group ID + + The UID and GID are carried by most, if not all, Linux objects, even if in + some cases it has to be invented (FAT or CIFS files for example, which are + derived from Windows). These (mostly) define the objective context of + that object, with tasks being slightly different in some cases. + + Effective, Saved and FS User ID + Effective, Saved and FS Group ID + Supplementary groups + + These are additional credentials used by tasks only. Usually, an + EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID + will be used as the objective. For tasks, it should be noted that this is + not always true. + + (2) Capabilities. + + Set of permitted capabilities + Set of inheritable capabilities + Set of effective capabilities + Capability bounding set + + These are only carried by tasks. They indicate superior capabilities + granted piecemeal to a task that an ordinary task wouldn't otherwise have. + These are manipulated implicitly by changes to the traditional UNIX + credentials, but can also be manipulated directly by the capset() system + call. + + The permitted capabilities are those caps that the process might grant + itself to its effective or permitted sets through capset(). This + inheritable set might also be so constrained. + + The effective capabilities are the ones that a task is actually allowed to + make use of itself. + + The inheritable capabilities are the ones that may get passed across + execve(). + + The bounding set limits the capabilities that may be inherited across + execve(), especially when a binary is executed that will execute as UID 0. + + (3) Secure management flags (securebits). + + These are only carried by tasks. These govern the way the above + credentials are manipulated and inherited over certain operations such as + execve(). They aren't used directly as objective or subjective + credentials. + + (4) Keys and keyrings. + + These are only carried by tasks. They carry and cache security tokens + that don't fit into the other standard UNIX credentials. They are for + making such things as network filesystem keys available to the file + accesses performed by processes, without the necessity of ordinary + programs having to know about security details involved. + + Keyrings are a special type of key. They carry sets of other keys and can + be searched for the desired key. Each process may subscribe to a number + of keyrings: + + Per-thread keying + Per-process keyring + Per-session keyring + + When a process accesses a key, if not already present, it will normally be + cached on one of these keyrings for future accesses to find. + + For more information on using keys, see Documentation/security/keys.txt. + + (5) LSM + + The Linux Security Module allows extra controls to be placed over the + operations that a task may do. Currently Linux supports two main + alternate LSM options: SELinux and Smack. + + Both work by labelling the objects in a system and then applying sets of + rules (policies) that say what operations a task with one label may do to + an object with another label. + + (6) AF_KEY + + This is a socket-based approach to credential management for networking + stacks [RFC 2367]. It isn't discussed by this document as it doesn't + interact directly with task and file credentials; rather it keeps system + level credentials. + + +When a file is opened, part of the opening task's subjective context is +recorded in the file struct created. This allows operations using that file +struct to use those credentials instead of the subjective context of the task +that issued the operation. An example of this would be a file opened on a +network filesystem where the credentials of the opened file should be presented +to the server, regardless of who is actually doing a read or a write upon it. + + +============= +FILE MARKINGS +============= + +Files on disk or obtained over the network may have annotations that form the +objective security context of that file. Depending on the type of filesystem, +this may include one or more of the following: + + (*) UNIX UID, GID, mode; + + (*) Windows user ID; + + (*) Access control list; + + (*) LSM security label; + + (*) UNIX exec privilege escalation bits (SUID/SGID); + + (*) File capabilities exec privilege escalation bits. + +These are compared to the task's subjective security context, and certain +operations allowed or disallowed as a result. In the case of execve(), the +privilege escalation bits come into play, and may allow the resulting process +extra privileges, based on the annotations on the executable file. + + +================ +TASK CREDENTIALS +================ + +In Linux, all of a task's credentials are held in (uid, gid) or through +(groups, keys, LSM security) a refcounted structure of type 'struct cred'. +Each task points to its credentials by a pointer called 'cred' in its +task_struct. + +Once a set of credentials has been prepared and committed, it may not be +changed, barring the following exceptions: + + (1) its reference count may be changed; + + (2) the reference count on the group_info struct it points to may be changed; + + (3) the reference count on the security data it points to may be changed; + + (4) the reference count on any keyrings it points to may be changed; + + (5) any keyrings it points to may be revoked, expired or have their security + attributes changed; and + + (6) the contents of any keyrings to which it points may be changed (the whole + point of keyrings being a shared set of credentials, modifiable by anyone + with appropriate access). + +To alter anything in the cred struct, the copy-and-replace principle must be +adhered to. First take a copy, then alter the copy and then use RCU to change +the task pointer to make it point to the new copy. There are wrappers to aid +with this (see below). + +A task may only alter its _own_ credentials; it is no longer permitted for a +task to alter another's credentials. This means the capset() system call is no +longer permitted to take any PID other than the one of the current process. +Also keyctl_instantiate() and keyctl_negate() functions no longer permit +attachment to process-specific keyrings in the requesting process as the +instantiating process may need to create them. + + +IMMUTABLE CREDENTIALS +--------------------- + +Once a set of credentials has been made public (by calling commit_creds() for +example), it must be considered immutable, barring two exceptions: + + (1) The reference count may be altered. + + (2) Whilst the keyring subscriptions of a set of credentials may not be + changed, the keyrings subscribed to may have their contents altered. + +To catch accidental credential alteration at compile time, struct task_struct +has _const_ pointers to its credential sets, as does struct file. Furthermore, +certain functions such as get_cred() and put_cred() operate on const pointers, +thus rendering casts unnecessary, but require to temporarily ditch the const +qualification to be able to alter the reference count. + + +ACCESSING TASK CREDENTIALS +-------------------------- + +A task being able to alter only its own credentials permits the current process +to read or replace its own credentials without the need for any form of locking +- which simplifies things greatly. It can just call: + + const struct cred *current_cred() + +to get a pointer to its credentials structure, and it doesn't have to release +it afterwards. + +There are convenience wrappers for retrieving specific aspects of a task's +credentials (the value is simply returned in each case): + + uid_t current_uid(void) Current's real UID + gid_t current_gid(void) Current's real GID + uid_t current_euid(void) Current's effective UID + gid_t current_egid(void) Current's effective GID + uid_t current_fsuid(void) Current's file access UID + gid_t current_fsgid(void) Current's file access GID + kernel_cap_t current_cap(void) Current's effective capabilities + void *current_security(void) Current's LSM security pointer + struct user_struct *current_user(void) Current's user account + +There are also convenience wrappers for retrieving specific associated pairs of +a task's credentials: + + void current_uid_gid(uid_t *, gid_t *); + void current_euid_egid(uid_t *, gid_t *); + void current_fsuid_fsgid(uid_t *, gid_t *); + +which return these pairs of values through their arguments after retrieving +them from the current task's credentials. + + +In addition, there is a function for obtaining a reference on the current +process's current set of credentials: + + const struct cred *get_current_cred(void); + +and functions for getting references to one of the credentials that don't +actually live in struct cred: + + struct user_struct *get_current_user(void); + struct group_info *get_current_groups(void); + +which get references to the current process's user accounting structure and +supplementary groups list respectively. + +Once a reference has been obtained, it must be released with put_cred(), +free_uid() or put_group_info() as appropriate. + + +ACCESSING ANOTHER TASK'S CREDENTIALS +------------------------------------ + +Whilst a task may access its own credentials without the need for locking, the +same is not true of a task wanting to access another task's credentials. It +must use the RCU read lock and rcu_dereference(). + +The rcu_dereference() is wrapped by: + + const struct cred *__task_cred(struct task_struct *task); + +This should be used inside the RCU read lock, as in the following example: + + void foo(struct task_struct *t, struct foo_data *f) + { + const struct cred *tcred; + ... + rcu_read_lock(); + tcred = __task_cred(t); + f->uid = tcred->uid; + f->gid = tcred->gid; + f->groups = get_group_info(tcred->groups); + rcu_read_unlock(); + ... + } + +Should it be necessary to hold another task's credentials for a long period of +time, and possibly to sleep whilst doing so, then the caller should get a +reference on them using: + + const struct cred *get_task_cred(struct task_struct *task); + +This does all the RCU magic inside of it. The caller must call put_cred() on +the credentials so obtained when they're finished with. + + [*] Note: The result of __task_cred() should not be passed directly to + get_cred() as this may race with commit_cred(). + +There are a couple of convenience functions to access bits of another task's +credentials, hiding the RCU magic from the caller: + + uid_t task_uid(task) Task's real UID + uid_t task_euid(task) Task's effective UID + +If the caller is holding the RCU read lock at the time anyway, then: + + __task_cred(task)->uid + __task_cred(task)->euid + +should be used instead. Similarly, if multiple aspects of a task's credentials +need to be accessed, RCU read lock should be used, __task_cred() called, the +result stored in a temporary pointer and then the credential aspects called +from that before dropping the lock. This prevents the potentially expensive +RCU magic from being invoked multiple times. + +Should some other single aspect of another task's credentials need to be +accessed, then this can be used: + + task_cred_xxx(task, member) + +where 'member' is a non-pointer member of the cred struct. For instance: + + uid_t task_cred_xxx(task, suid); + +will retrieve 'struct cred::suid' from the task, doing the appropriate RCU +magic. This may not be used for pointer members as what they point to may +disappear the moment the RCU read lock is dropped. + + +ALTERING CREDENTIALS +-------------------- + +As previously mentioned, a task may only alter its own credentials, and may not +alter those of another task. This means that it doesn't need to use any +locking to alter its own credentials. + +To alter the current process's credentials, a function should first prepare a +new set of credentials by calling: + + struct cred *prepare_creds(void); + +this locks current->cred_replace_mutex and then allocates and constructs a +duplicate of the current process's credentials, returning with the mutex still +held if successful. It returns NULL if not successful (out of memory). + +The mutex prevents ptrace() from altering the ptrace state of a process whilst +security checks on credentials construction and changing is taking place as +the ptrace state may alter the outcome, particularly in the case of execve(). + +The new credentials set should be altered appropriately, and any security +checks and hooks done. Both the current and the proposed sets of credentials +are available for this purpose as current_cred() will return the current set +still at this point. + + +When the credential set is ready, it should be committed to the current process +by calling: + + int commit_creds(struct cred *new); + +This will alter various aspects of the credentials and the process, giving the +LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually +commit the new credentials to current->cred, it will release +current->cred_replace_mutex to allow ptrace() to take place, and it will notify +the scheduler and others of the changes. + +This function is guaranteed to return 0, so that it can be tail-called at the +end of such functions as sys_setresuid(). + +Note that this function consumes the caller's reference to the new credentials. +The caller should _not_ call put_cred() on the new credentials afterwards. + +Furthermore, once this function has been called on a new set of credentials, +those credentials may _not_ be changed further. + + +Should the security checks fail or some other error occur after prepare_creds() +has been called, then the following function should be invoked: + + void abort_creds(struct cred *new); + +This releases the lock on current->cred_replace_mutex that prepare_creds() got +and then releases the new credentials. + + +A typical credentials alteration function would look something like this: + + int alter_suid(uid_t suid) + { + struct cred *new; + int ret; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->suid = suid; + ret = security_alter_suid(new); + if (ret < 0) { + abort_creds(new); + return ret; + } + + return commit_creds(new); + } + + +MANAGING CREDENTIALS +-------------------- + +There are some functions to help manage credentials: + + (*) void put_cred(const struct cred *cred); + + This releases a reference to the given set of credentials. If the + reference count reaches zero, the credentials will be scheduled for + destruction by the RCU system. + + (*) const struct cred *get_cred(const struct cred *cred); + + This gets a reference on a live set of credentials, returning a pointer to + that set of credentials. + + (*) struct cred *get_new_cred(struct cred *cred); + + This gets a reference on a set of credentials that is under construction + and is thus still mutable, returning a pointer to that set of credentials. + + +===================== +OPEN FILE CREDENTIALS +===================== + +When a new file is opened, a reference is obtained on the opening task's +credentials and this is attached to the file struct as 'f_cred' in place of +'f_uid' and 'f_gid'. Code that used to access file->f_uid and file->f_gid +should now access file->f_cred->fsuid and file->f_cred->fsgid. + +It is safe to access f_cred without the use of RCU or locking because the +pointer will not change over the lifetime of the file struct, and nor will the +contents of the cred struct pointed to, barring the exceptions listed above +(see the Task Credentials section). + + +======================================= +OVERRIDING THE VFS'S USE OF CREDENTIALS +======================================= + +Under some circumstances it is desirable to override the credentials used by +the VFS, and that can be done by calling into such as vfs_mkdir() with a +different set of credentials. This is done in the following places: + + (*) sys_faccessat(). + + (*) do_coredump(). + + (*) nfs4recover.c. diff --git a/Documentation/security/keys-request-key.txt b/Documentation/security/keys-request-key.txt new file mode 100644 index 000000000000..51987bfecfed --- /dev/null +++ b/Documentation/security/keys-request-key.txt @@ -0,0 +1,202 @@ + =================== + KEY REQUEST SERVICE + =================== + +The key request service is part of the key retention service (refer to +Documentation/security/keys.txt). This document explains more fully how +the requesting algorithm works. + +The process starts by either the kernel requesting a service by calling +request_key*(): + + struct key *request_key(const struct key_type *type, + const char *description, + const char *callout_info); + +or: + + struct key *request_key_with_auxdata(const struct key_type *type, + const char *description, + const char *callout_info, + size_t callout_len, + void *aux); + +or: + + struct key *request_key_async(const struct key_type *type, + const char *description, + const char *callout_info, + size_t callout_len); + +or: + + struct key *request_key_async_with_auxdata(const struct key_type *type, + const char *description, + const char *callout_info, + size_t callout_len, + void *aux); + +Or by userspace invoking the request_key system call: + + key_serial_t request_key(const char *type, + const char *description, + const char *callout_info, + key_serial_t dest_keyring); + +The main difference between the access points is that the in-kernel interface +does not need to link the key to a keyring to prevent it from being immediately +destroyed. The kernel interface returns a pointer directly to the key, and +it's up to the caller to destroy the key. + +The request_key*_with_auxdata() calls are like the in-kernel request_key*() +calls, except that they permit auxiliary data to be passed to the upcaller (the +default is NULL). This is only useful for those key types that define their +own upcall mechanism rather than using /sbin/request-key. + +The two async in-kernel calls may return keys that are still in the process of +being constructed. The two non-async ones will wait for construction to +complete first. + +The userspace interface links the key to a keyring associated with the process +to prevent the key from going away, and returns the serial number of the key to +the caller. + + +The following example assumes that the key types involved don't define their +own upcall mechanisms. If they do, then those should be substituted for the +forking and execution of /sbin/request-key. + + +=========== +THE PROCESS +=========== + +A request proceeds in the following manner: + + (1) Process A calls request_key() [the userspace syscall calls the kernel + interface]. + + (2) request_key() searches the process's subscribed keyrings to see if there's + a suitable key there. If there is, it returns the key. If there isn't, + and callout_info is not set, an error is returned. Otherwise the process + proceeds to the next step. + + (3) request_key() sees that A doesn't have the desired key yet, so it creates + two things: + + (a) An uninstantiated key U of requested type and description. + + (b) An authorisation key V that refers to key U and notes that process A + is the context in which key U should be instantiated and secured, and + from which associated key requests may be satisfied. + + (4) request_key() then forks and executes /sbin/request-key with a new session + keyring that contains a link to auth key V. + + (5) /sbin/request-key assumes the authority associated with key U. + + (6) /sbin/request-key execs an appropriate program to perform the actual + instantiation. + + (7) The program may want to access another key from A's context (say a + Kerberos TGT key). It just requests the appropriate key, and the keyring + search notes that the session keyring has auth key V in its bottom level. + + This will permit it to then search the keyrings of process A with the + UID, GID, groups and security info of process A as if it was process A, + and come up with key W. + + (8) The program then does what it must to get the data with which to + instantiate key U, using key W as a reference (perhaps it contacts a + Kerberos server using the TGT) and then instantiates key U. + + (9) Upon instantiating key U, auth key V is automatically revoked so that it + may not be used again. + +(10) The program then exits 0 and request_key() deletes key V and returns key + U to the caller. + +This also extends further. If key W (step 7 above) didn't exist, key W would +be created uninstantiated, another auth key (X) would be created (as per step +3) and another copy of /sbin/request-key spawned (as per step 4); but the +context specified by auth key X will still be process A, as it was in auth key +V. + +This is because process A's keyrings can't simply be attached to +/sbin/request-key at the appropriate places because (a) execve will discard two +of them, and (b) it requires the same UID/GID/Groups all the way through. + + +==================================== +NEGATIVE INSTANTIATION AND REJECTION +==================================== + +Rather than instantiating a key, it is possible for the possessor of an +authorisation key to negatively instantiate a key that's under construction. +This is a short duration placeholder that causes any attempt at re-requesting +the key whilst it exists to fail with error ENOKEY if negated or the specified +error if rejected. + +This is provided to prevent excessive repeated spawning of /sbin/request-key +processes for a key that will never be obtainable. + +Should the /sbin/request-key process exit anything other than 0 or die on a +signal, the key under construction will be automatically negatively +instantiated for a short amount of time. + + +==================== +THE SEARCH ALGORITHM +==================== + +A search of any particular keyring proceeds in the following fashion: + + (1) When the key management code searches for a key (keyring_search_aux) it + firstly calls key_permission(SEARCH) on the keyring it's starting with, + if this denies permission, it doesn't search further. + + (2) It considers all the non-keyring keys within that keyring and, if any key + matches the criteria specified, calls key_permission(SEARCH) on it to see + if the key is allowed to be found. If it is, that key is returned; if + not, the search continues, and the error code is retained if of higher + priority than the one currently set. + + (3) It then considers all the keyring-type keys in the keyring it's currently + searching. It calls key_permission(SEARCH) on each keyring, and if this + grants permission, it recurses, executing steps (2) and (3) on that + keyring. + +The process stops immediately a valid key is found with permission granted to +use it. Any error from a previous match attempt is discarded and the key is +returned. + +When search_process_keyrings() is invoked, it performs the following searches +until one succeeds: + + (1) If extant, the process's thread keyring is searched. + + (2) If extant, the process's process keyring is searched. + + (3) The process's session keyring is searched. + + (4) If the process has assumed the authority associated with a request_key() + authorisation key then: + + (a) If extant, the calling process's thread keyring is searched. + + (b) If extant, the calling process's process keyring is searched. + + (c) The calling process's session keyring is searched. + +The moment one succeeds, all pending errors are discarded and the found key is +returned. + +Only if all these fail does the whole thing fail with the highest priority +error. Note that several errors may have come from LSM. + +The error priority is: + + EKEYREVOKED > EKEYEXPIRED > ENOKEY + +EACCES/EPERM are only returned on a direct search of a specific keyring where +the basal keyring does not grant Search permission. diff --git a/Documentation/keys-trusted-encrypted.txt b/Documentation/security/keys-trusted-encrypted.txt similarity index 100% rename from Documentation/keys-trusted-encrypted.txt rename to Documentation/security/keys-trusted-encrypted.txt diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt new file mode 100644 index 000000000000..4d75931d2d79 --- /dev/null +++ b/Documentation/security/keys.txt @@ -0,0 +1,1290 @@ + ============================ + KERNEL KEY RETENTION SERVICE + ============================ + +This service allows cryptographic keys, authentication tokens, cross-domain +user mappings, and similar to be cached in the kernel for the use of +filesystems and other kernel services. + +Keyrings are permitted; these are a special type of key that can hold links to +other keys. Processes each have three standard keyring subscriptions that a +kernel service can search for relevant keys. + +The key service can be configured on by enabling: + + "Security options"/"Enable access key retention support" (CONFIG_KEYS) + +This document has the following sections: + + - Key overview + - Key service overview + - Key access permissions + - SELinux support + - New procfs files + - Userspace system call interface + - Kernel services + - Notes on accessing payload contents + - Defining a key type + - Request-key callback service + - Garbage collection + + +============ +KEY OVERVIEW +============ + +In this context, keys represent units of cryptographic data, authentication +tokens, keyrings, etc.. These are represented in the kernel by struct key. + +Each key has a number of attributes: + + - A serial number. + - A type. + - A description (for matching a key in a search). + - Access control information. + - An expiry time. + - A payload. + - State. + + + (*) Each key is issued a serial number of type key_serial_t that is unique for + the lifetime of that key. All serial numbers are positive non-zero 32-bit + integers. + + Userspace programs can use a key's serial numbers as a way to gain access + to it, subject to permission checking. + + (*) Each key is of a defined "type". Types must be registered inside the + kernel by a kernel service (such as a filesystem) before keys of that type + can be added or used. Userspace programs cannot define new types directly. + + Key types are represented in the kernel by struct key_type. This defines a + number of operations that can be performed on a key of that type. + + Should a type be removed from the system, all the keys of that type will + be invalidated. + + (*) Each key has a description. This should be a printable string. The key + type provides an operation to perform a match between the description on a + key and a criterion string. + + (*) Each key has an owner user ID, a group ID and a permissions mask. These + are used to control what a process may do to a key from userspace, and + whether a kernel service will be able to find the key. + + (*) Each key can be set to expire at a specific time by the key type's + instantiation function. Keys can also be immortal. + + (*) Each key can have a payload. This is a quantity of data that represent the + actual "key". In the case of a keyring, this is a list of keys to which + the keyring links; in the case of a user-defined key, it's an arbitrary + blob of data. + + Having a payload is not required; and the payload can, in fact, just be a + value stored in the struct key itself. + + When a key is instantiated, the key type's instantiation function is + called with a blob of data, and that then creates the key's payload in + some way. + + Similarly, when userspace wants to read back the contents of the key, if + permitted, another key type operation will be called to convert the key's + attached payload back into a blob of data. + + (*) Each key can be in one of a number of basic states: + + (*) Uninstantiated. The key exists, but does not have any data attached. + Keys being requested from userspace will be in this state. + + (*) Instantiated. This is the normal state. The key is fully formed, and + has data attached. + + (*) Negative. This is a relatively short-lived state. The key acts as a + note saying that a previous call out to userspace failed, and acts as + a throttle on key lookups. A negative key can be updated to a normal + state. + + (*) Expired. Keys can have lifetimes set. If their lifetime is exceeded, + they traverse to this state. An expired key can be updated back to a + normal state. + + (*) Revoked. A key is put in this state by userspace action. It can't be + found or operated upon (apart from by unlinking it). + + (*) Dead. The key's type was unregistered, and so the key is now useless. + +Keys in the last three states are subject to garbage collection. See the +section on "Garbage collection". + + +==================== +KEY SERVICE OVERVIEW +==================== + +The key service provides a number of features besides keys: + + (*) The key service defines two special key types: + + (+) "keyring" + + Keyrings are special keys that contain a list of other keys. Keyring + lists can be modified using various system calls. Keyrings should not + be given a payload when created. + + (+) "user" + + A key of this type has a description and a payload that are arbitrary + blobs of data. These can be created, updated and read by userspace, + and aren't intended for use by kernel services. + + (*) Each process subscribes to three keyrings: a thread-specific keyring, a + process-specific keyring, and a session-specific keyring. + + The thread-specific keyring is discarded from the child when any sort of + clone, fork, vfork or execve occurs. A new keyring is created only when + required. + + The process-specific keyring is replaced with an empty one in the child on + clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is + shared. execve also discards the process's process keyring and creates a + new one. + + The session-specific keyring is persistent across clone, fork, vfork and + execve, even when the latter executes a set-UID or set-GID binary. A + process can, however, replace its current session keyring with a new one + by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous + new one, or to attempt to create or join one of a specific name. + + The ownership of the thread keyring changes when the real UID and GID of + the thread changes. + + (*) Each user ID resident in the system holds two special keyrings: a user + specific keyring and a default user session keyring. The default session + keyring is initialised with a link to the user-specific keyring. + + When a process changes its real UID, if it used to have no session key, it + will be subscribed to the default session key for the new UID. + + If a process attempts to access its session key when it doesn't have one, + it will be subscribed to the default for its current UID. + + (*) Each user has two quotas against which the keys they own are tracked. One + limits the total number of keys and keyrings, the other limits the total + amount of description and payload space that can be consumed. + + The user can view information on this and other statistics through procfs + files. The root user may also alter the quota limits through sysctl files + (see the section "New procfs files"). + + Process-specific and thread-specific keyrings are not counted towards a + user's quota. + + If a system call that modifies a key or keyring in some way would put the + user over quota, the operation is refused and error EDQUOT is returned. + + (*) There's a system call interface by which userspace programs can create and + manipulate keys and keyrings. + + (*) There's a kernel interface by which services can register types and search + for keys. + + (*) There's a way for the a search done from the kernel to call back to + userspace to request a key that can't be found in a process's keyrings. + + (*) An optional filesystem is available through which the key database can be + viewed and manipulated. + + +====================== +KEY ACCESS PERMISSIONS +====================== + +Keys have an owner user ID, a group access ID, and a permissions mask. The mask +has up to eight bits each for possessor, user, group and other access. Only +six of each set of eight bits are defined. These permissions granted are: + + (*) View + + This permits a key or keyring's attributes to be viewed - including key + type and description. + + (*) Read + + This permits a key's payload to be viewed or a keyring's list of linked + keys. + + (*) Write + + This permits a key's payload to be instantiated or updated, or it allows a + link to be added to or removed from a keyring. + + (*) Search + + This permits keyrings to be searched and keys to be found. Searches can + only recurse into nested keyrings that have search permission set. + + (*) Link + + This permits a key or keyring to be linked to. To create a link from a + keyring to a key, a process must have Write permission on the keyring and + Link permission on the key. + + (*) Set Attribute + + This permits a key's UID, GID and permissions mask to be changed. + +For changing the ownership, group ID or permissions mask, being the owner of +the key or having the sysadmin capability is sufficient. + + +=============== +SELINUX SUPPORT +=============== + +The security class "key" has been added to SELinux so that mandatory access +controls can be applied to keys created within various contexts. This support +is preliminary, and is likely to change quite significantly in the near future. +Currently, all of the basic permissions explained above are provided in SELinux +as well; SELinux is simply invoked after all basic permission checks have been +performed. + +The value of the file /proc/self/attr/keycreate influences the labeling of +newly-created keys. If the contents of that file correspond to an SELinux +security context, then the key will be assigned that context. Otherwise, the +key will be assigned the current context of the task that invoked the key +creation request. Tasks must be granted explicit permission to assign a +particular context to newly-created keys, using the "create" permission in the +key security class. + +The default keyrings associated with users will be labeled with the default +context of the user if and only if the login programs have been instrumented to +properly initialize keycreate during the login process. Otherwise, they will +be labeled with the context of the login program itself. + +Note, however, that the default keyrings associated with the root user are +labeled with the default kernel context, since they are created early in the +boot process, before root has a chance to log in. + +The keyrings associated with new threads are each labeled with the context of +their associated thread, and both session and process keyrings are handled +similarly. + + +================ +NEW PROCFS FILES +================ + +Two files have been added to procfs by which an administrator can find out +about the status of the key service: + + (*) /proc/keys + + This lists the keys that are currently viewable by the task reading the + file, giving information about their type, description and permissions. + It is not possible to view the payload of the key this way, though some + information about it may be given. + + The only keys included in the list are those that grant View permission to + the reading process whether or not it possesses them. Note that LSM + security checks are still performed, and may further filter out keys that + the current process is not authorised to view. + + The contents of the file look like this: + + SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY + 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4 + 00000002 I----- 2 perm 1f3f0000 0 0 keyring _uid.0: empty + 00000007 I----- 1 perm 1f3f0000 0 0 keyring _pid.1: empty + 0000018d I----- 1 perm 1f3f0000 0 0 keyring _pid.412: empty + 000004d2 I--Q-- 1 perm 1f3f0000 32 -1 keyring _uid.32: 1/4 + 000004d3 I--Q-- 3 perm 1f3f0000 32 -1 keyring _uid_ses.32: empty + 00000892 I--QU- 1 perm 1f000000 0 0 user metal:copper: 0 + 00000893 I--Q-N 1 35s 1f3f0000 0 0 user metal:silver: 0 + 00000894 I--Q-- 1 10h 003f0000 0 0 user metal:gold: 0 + + The flags are: + + I Instantiated + R Revoked + D Dead + Q Contributes to user's quota + U Under construction by callback to userspace + N Negative key + + This file must be enabled at kernel configuration time as it allows anyone + to list the keys database. + + (*) /proc/key-users + + This file lists the tracking data for each user that has at least one key + on the system. Such data includes quota information and statistics: + + [root@andromeda root]# cat /proc/key-users + 0: 46 45/45 1/100 13/10000 + 29: 2 2/2 2/100 40/10000 + 32: 2 2/2 2/100 40/10000 + 38: 2 2/2 2/100 40/10000 + + The format of each line is + : User ID to which this applies + Structure refcount + / Total number of keys and number instantiated + / Key count quota + / Key size quota + + +Four new sysctl files have been added also for the purpose of controlling the +quota limits on keys: + + (*) /proc/sys/kernel/keys/root_maxkeys + /proc/sys/kernel/keys/root_maxbytes + + These files hold the maximum number of keys that root may have and the + maximum total number of bytes of data that root may have stored in those + keys. + + (*) /proc/sys/kernel/keys/maxkeys + /proc/sys/kernel/keys/maxbytes + + These files hold the maximum number of keys that each non-root user may + have and the maximum total number of bytes of data that each of those + users may have stored in their keys. + +Root may alter these by writing each new limit as a decimal number string to +the appropriate file. + + +=============================== +USERSPACE SYSTEM CALL INTERFACE +=============================== + +Userspace can manipulate keys directly through three new syscalls: add_key, +request_key and keyctl. The latter provides a number of functions for +manipulating keys. + +When referring to a key directly, userspace programs should use the key's +serial number (a positive 32-bit integer). However, there are some special +values available for referring to special keys and keyrings that relate to the +process making the call: + + CONSTANT VALUE KEY REFERENCED + ============================== ====== =========================== + KEY_SPEC_THREAD_KEYRING -1 thread-specific keyring + KEY_SPEC_PROCESS_KEYRING -2 process-specific keyring + KEY_SPEC_SESSION_KEYRING -3 session-specific keyring + KEY_SPEC_USER_KEYRING -4 UID-specific keyring + KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring + KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring + KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key() + authorisation key + + +The main syscalls are: + + (*) Create a new key of given type, description and payload and add it to the + nominated keyring: + + key_serial_t add_key(const char *type, const char *desc, + const void *payload, size_t plen, + key_serial_t keyring); + + If a key of the same type and description as that proposed already exists + in the keyring, this will try to update it with the given payload, or it + will return error EEXIST if that function is not supported by the key + type. The process must also have permission to write to the key to be able + to update it. The new key will have all user permissions granted and no + group or third party permissions. + + Otherwise, this will attempt to create a new key of the specified type and + description, and to instantiate it with the supplied payload and attach it + to the keyring. In this case, an error will be generated if the process + does not have permission to write to the keyring. + + The payload is optional, and the pointer can be NULL if not required by + the type. The payload is plen in size, and plen can be zero for an empty + payload. + + A new keyring can be generated by setting type "keyring", the keyring name + as the description (or NULL) and setting the payload to NULL. + + User defined keys can be created by specifying type "user". It is + recommended that a user defined key's description by prefixed with a type + ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting + ticket. + + Any other type must have been registered with the kernel in advance by a + kernel service such as a filesystem. + + The ID of the new or updated key is returned if successful. + + + (*) Search the process's keyrings for a key, potentially calling out to + userspace to create it. + + key_serial_t request_key(const char *type, const char *description, + const char *callout_info, + key_serial_t dest_keyring); + + This function searches all the process's keyrings in the order thread, + process, session for a matching key. This works very much like + KEYCTL_SEARCH, including the optional attachment of the discovered key to + a keyring. + + If a key cannot be found, and if callout_info is not NULL, then + /sbin/request-key will be invoked in an attempt to obtain a key. The + callout_info string will be passed as an argument to the program. + + See also Documentation/security/keys-request-key.txt. + + +The keyctl syscall functions are: + + (*) Map a special key ID to a real key ID for this process: + + key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id, + int create); + + The special key specified by "id" is looked up (with the key being created + if necessary) and the ID of the key or keyring thus found is returned if + it exists. + + If the key does not yet exist, the key will be created if "create" is + non-zero; and the error ENOKEY will be returned if "create" is zero. + + + (*) Replace the session keyring this process subscribes to with a new one: + + key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name); + + If name is NULL, an anonymous keyring is created attached to the process + as its session keyring, displacing the old session keyring. + + If name is not NULL, if a keyring of that name exists, the process + attempts to attach it as the session keyring, returning an error if that + is not permitted; otherwise a new keyring of that name is created and + attached as the session keyring. + + To attach to a named keyring, the keyring must have search permission for + the process's ownership. + + The ID of the new session keyring is returned if successful. + + + (*) Update the specified key: + + long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload, + size_t plen); + + This will try to update the specified key with the given payload, or it + will return error EOPNOTSUPP if that function is not supported by the key + type. The process must also have permission to write to the key to be able + to update it. + + The payload is of length plen, and may be absent or empty as for + add_key(). + + + (*) Revoke a key: + + long keyctl(KEYCTL_REVOKE, key_serial_t key); + + This makes a key unavailable for further operations. Further attempts to + use the key will be met with error EKEYREVOKED, and the key will no longer + be findable. + + + (*) Change the ownership of a key: + + long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid); + + This function permits a key's owner and group ID to be changed. Either one + of uid or gid can be set to -1 to suppress that change. + + Only the superuser can change a key's owner to something other than the + key's current owner. Similarly, only the superuser can change a key's + group ID to something other than the calling process's group ID or one of + its group list members. + + + (*) Change the permissions mask on a key: + + long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm); + + This function permits the owner of a key or the superuser to change the + permissions mask on a key. + + Only bits the available bits are permitted; if any other bits are set, + error EINVAL will be returned. + + + (*) Describe a key: + + long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer, + size_t buflen); + + This function returns a summary of the key's attributes (but not its + payload data) as a string in the buffer provided. + + Unless there's an error, it always returns the amount of data it could + produce, even if that's too big for the buffer, but it won't copy more + than requested to userspace. If the buffer pointer is NULL then no copy + will take place. + + A process must have view permission on the key for this function to be + successful. + + If successful, a string is placed in the buffer in the following format: + + ;;;; + + Where type and description are strings, uid and gid are decimal, and perm + is hexadecimal. A NUL character is included at the end of the string if + the buffer is sufficiently big. + + This can be parsed with + + sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc); + + + (*) Clear out a keyring: + + long keyctl(KEYCTL_CLEAR, key_serial_t keyring); + + This function clears the list of keys attached to a keyring. The calling + process must have write permission on the keyring, and it must be a + keyring (or else error ENOTDIR will result). + + + (*) Link a key into a keyring: + + long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key); + + This function creates a link from the keyring to the key. The process must + have write permission on the keyring and must have link permission on the + key. + + Should the keyring not be a keyring, error ENOTDIR will result; and if the + keyring is full, error ENFILE will result. + + The link procedure checks the nesting of the keyrings, returning ELOOP if + it appears too deep or EDEADLK if the link would introduce a cycle. + + Any links within the keyring to keys that match the new key in terms of + type and description will be discarded from the keyring as the new one is + added. + + + (*) Unlink a key or keyring from another keyring: + + long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key); + + This function looks through the keyring for the first link to the + specified key, and removes it if found. Subsequent links to that key are + ignored. The process must have write permission on the keyring. + + If the keyring is not a keyring, error ENOTDIR will result; and if the key + is not present, error ENOENT will be the result. + + + (*) Search a keyring tree for a key: + + key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring, + const char *type, const char *description, + key_serial_t dest_keyring); + + This searches the keyring tree headed by the specified keyring until a key + is found that matches the type and description criteria. Each keyring is + checked for keys before recursion into its children occurs. + + The process must have search permission on the top level keyring, or else + error EACCES will result. Only keyrings that the process has search + permission on will be recursed into, and only keys and keyrings for which + a process has search permission can be matched. If the specified keyring + is not a keyring, ENOTDIR will result. + + If the search succeeds, the function will attempt to link the found key + into the destination keyring if one is supplied (non-zero ID). All the + constraints applicable to KEYCTL_LINK apply in this case too. + + Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search + fails. On success, the resulting key ID will be returned. + + + (*) Read the payload data from a key: + + long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer, + size_t buflen); + + This function attempts to read the payload data from the specified key + into the buffer. The process must have read permission on the key to + succeed. + + The returned data will be processed for presentation by the key type. For + instance, a keyring will return an array of key_serial_t entries + representing the IDs of all the keys to which it is subscribed. The user + defined key type will return its data as is. If a key type does not + implement this function, error EOPNOTSUPP will result. + + As much of the data as can be fitted into the buffer will be copied to + userspace if the buffer pointer is not NULL. + + On a successful return, the function will always return the amount of data + available rather than the amount copied. + + + (*) Instantiate a partially constructed key. + + long keyctl(KEYCTL_INSTANTIATE, key_serial_t key, + const void *payload, size_t plen, + key_serial_t keyring); + long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key, + const struct iovec *payload_iov, unsigned ioc, + key_serial_t keyring); + + If the kernel calls back to userspace to complete the instantiation of a + key, userspace should use this call to supply data for the key before the + invoked process returns, or else the key will be marked negative + automatically. + + The process must have write access on the key to be able to instantiate + it, and the key must be uninstantiated. + + If a keyring is specified (non-zero), the key will also be linked into + that keyring, however all the constraints applying in KEYCTL_LINK apply in + this case too. + + The payload and plen arguments describe the payload data as for add_key(). + + The payload_iov and ioc arguments describe the payload data in an iovec + array instead of a single buffer. + + + (*) Negatively instantiate a partially constructed key. + + long keyctl(KEYCTL_NEGATE, key_serial_t key, + unsigned timeout, key_serial_t keyring); + long keyctl(KEYCTL_REJECT, key_serial_t key, + unsigned timeout, unsigned error, key_serial_t keyring); + + If the kernel calls back to userspace to complete the instantiation of a + key, userspace should use this call mark the key as negative before the + invoked process returns if it is unable to fulfil the request. + + The process must have write access on the key to be able to instantiate + it, and the key must be uninstantiated. + + If a keyring is specified (non-zero), the key will also be linked into + that keyring, however all the constraints applying in KEYCTL_LINK apply in + this case too. + + If the key is rejected, future searches for it will return the specified + error code until the rejected key expires. Negating the key is the same + as rejecting the key with ENOKEY as the error code. + + + (*) Set the default request-key destination keyring. + + long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl); + + This sets the default keyring to which implicitly requested keys will be + attached for this thread. reqkey_defl should be one of these constants: + + CONSTANT VALUE NEW DEFAULT KEYRING + ====================================== ====== ======================= + KEY_REQKEY_DEFL_NO_CHANGE -1 No change + KEY_REQKEY_DEFL_DEFAULT 0 Default[1] + KEY_REQKEY_DEFL_THREAD_KEYRING 1 Thread keyring + KEY_REQKEY_DEFL_PROCESS_KEYRING 2 Process keyring + KEY_REQKEY_DEFL_SESSION_KEYRING 3 Session keyring + KEY_REQKEY_DEFL_USER_KEYRING 4 User keyring + KEY_REQKEY_DEFL_USER_SESSION_KEYRING 5 User session keyring + KEY_REQKEY_DEFL_GROUP_KEYRING 6 Group keyring + + The old default will be returned if successful and error EINVAL will be + returned if reqkey_defl is not one of the above values. + + The default keyring can be overridden by the keyring indicated to the + request_key() system call. + + Note that this setting is inherited across fork/exec. + + [1] The default is: the thread keyring if there is one, otherwise + the process keyring if there is one, otherwise the session keyring if + there is one, otherwise the user default session keyring. + + + (*) Set the timeout on a key. + + long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout); + + This sets or clears the timeout on a key. The timeout can be 0 to clear + the timeout or a number of seconds to set the expiry time that far into + the future. + + The process must have attribute modification access on a key to set its + timeout. Timeouts may not be set with this function on negative, revoked + or expired keys. + + + (*) Assume the authority granted to instantiate a key + + long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key); + + This assumes or divests the authority required to instantiate the + specified key. Authority can only be assumed if the thread has the + authorisation key associated with the specified key in its keyrings + somewhere. + + Once authority is assumed, searches for keys will also search the + requester's keyrings using the requester's security label, UID, GID and + groups. + + If the requested authority is unavailable, error EPERM will be returned, + likewise if the authority has been revoked because the target key is + already instantiated. + + If the specified key is 0, then any assumed authority will be divested. + + The assumed authoritative key is inherited across fork and exec. + + + (*) Get the LSM security context attached to a key. + + long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer, + size_t buflen) + + This function returns a string that represents the LSM security context + attached to a key in the buffer provided. + + Unless there's an error, it always returns the amount of data it could + produce, even if that's too big for the buffer, but it won't copy more + than requested to userspace. If the buffer pointer is NULL then no copy + will take place. + + A NUL character is included at the end of the string if the buffer is + sufficiently big. This is included in the returned count. If no LSM is + in force then an empty string will be returned. + + A process must have view permission on the key for this function to be + successful. + + + (*) Install the calling process's session keyring on its parent. + + long keyctl(KEYCTL_SESSION_TO_PARENT); + + This functions attempts to install the calling process's session keyring + on to the calling process's parent, replacing the parent's current session + keyring. + + The calling process must have the same ownership as its parent, the + keyring must have the same ownership as the calling process, the calling + process must have LINK permission on the keyring and the active LSM module + mustn't deny permission, otherwise error EPERM will be returned. + + Error ENOMEM will be returned if there was insufficient memory to complete + the operation, otherwise 0 will be returned to indicate success. + + The keyring will be replaced next time the parent process leaves the + kernel and resumes executing userspace. + + +=============== +KERNEL SERVICES +=============== + +The kernel services for key management are fairly simple to deal with. They can +be broken down into two areas: keys and key types. + +Dealing with keys is fairly straightforward. Firstly, the kernel service +registers its type, then it searches for a key of that type. It should retain +the key as long as it has need of it, and then it should release it. For a +filesystem or device file, a search would probably be performed during the open +call, and the key released upon close. How to deal with conflicting keys due to +two different users opening the same file is left to the filesystem author to +solve. + +To access the key manager, the following header must be #included: + + + +Specific key types should have a header file under include/keys/ that should be +used to access that type. For keys of type "user", for example, that would be: + + + +Note that there are two different types of pointers to keys that may be +encountered: + + (*) struct key * + + This simply points to the key structure itself. Key structures will be at + least four-byte aligned. + + (*) key_ref_t + + This is equivalent to a struct key *, but the least significant bit is set + if the caller "possesses" the key. By "possession" it is meant that the + calling processes has a searchable link to the key from one of its + keyrings. There are three functions for dealing with these: + + key_ref_t make_key_ref(const struct key *key, + unsigned long possession); + + struct key *key_ref_to_ptr(const key_ref_t key_ref); + + unsigned long is_key_possessed(const key_ref_t key_ref); + + The first function constructs a key reference from a key pointer and + possession information (which must be 0 or 1 and not any other value). + + The second function retrieves the key pointer from a reference and the + third retrieves the possession flag. + +When accessing a key's payload contents, certain precautions must be taken to +prevent access vs modification races. See the section "Notes on accessing +payload contents" for more information. + +(*) To search for a key, call: + + struct key *request_key(const struct key_type *type, + const char *description, + const char *callout_info); + + This is used to request a key or keyring with a description that matches + the description specified according to the key type's match function. This + permits approximate matching to occur. If callout_string is not NULL, then + /sbin/request-key will be invoked in an attempt to obtain the key from + userspace. In that case, callout_string will be passed as an argument to + the program. + + Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be + returned. + + If successful, the key will have been attached to the default keyring for + implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING. + + See also Documentation/security/keys-request-key.txt. + + +(*) To search for a key, passing auxiliary data to the upcaller, call: + + struct key *request_key_with_auxdata(const struct key_type *type, + const char *description, + const void *callout_info, + size_t callout_len, + void *aux); + + This is identical to request_key(), except that the auxiliary data is + passed to the key_type->request_key() op if it exists, and the callout_info + is a blob of length callout_len, if given (the length may be 0). + + +(*) A key can be requested asynchronously by calling one of: + + struct key *request_key_async(const struct key_type *type, + const char *description, + const void *callout_info, + size_t callout_len); + + or: + + struct key *request_key_async_with_auxdata(const struct key_type *type, + const char *description, + const char *callout_info, + size_t callout_len, + void *aux); + + which are asynchronous equivalents of request_key() and + request_key_with_auxdata() respectively. + + These two functions return with the key potentially still under + construction. To wait for construction completion, the following should be + called: + + int wait_for_key_construction(struct key *key, bool intr); + + The function will wait for the key to finish being constructed and then + invokes key_validate() to return an appropriate value to indicate the state + of the key (0 indicates the key is usable). + + If intr is true, then the wait can be interrupted by a signal, in which + case error ERESTARTSYS will be returned. + + +(*) When it is no longer required, the key should be released using: + + void key_put(struct key *key); + + Or: + + void key_ref_put(key_ref_t key_ref); + + These can be called from interrupt context. If CONFIG_KEYS is not set then + the argument will not be parsed. + + +(*) Extra references can be made to a key by calling the following function: + + struct key *key_get(struct key *key); + + These need to be disposed of by calling key_put() when they've been + finished with. The key pointer passed in will be returned. If the pointer + is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and + no increment will take place. + + +(*) A key's serial number can be obtained by calling: + + key_serial_t key_serial(struct key *key); + + If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the + latter case without parsing the argument). + + +(*) If a keyring was found in the search, this can be further searched by: + + key_ref_t keyring_search(key_ref_t keyring_ref, + const struct key_type *type, + const char *description) + + This searches the keyring tree specified for a matching key. Error ENOKEY + is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful, + the returned key will need to be released. + + The possession attribute from the keyring reference is used to control + access through the permissions mask and is propagated to the returned key + reference pointer if successful. + + +(*) To check the validity of a key, this function can be called: + + int validate_key(struct key *key); + + This checks that the key in question hasn't expired or and hasn't been + revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will + be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be + returned (in the latter case without parsing the argument). + + +(*) To register a key type, the following function should be called: + + int register_key_type(struct key_type *type); + + This will return error EEXIST if a type of the same name is already + present. + + +(*) To unregister a key type, call: + + void unregister_key_type(struct key_type *type); + + +Under some circumstances, it may be desirable to deal with a bundle of keys. +The facility provides access to the keyring type for managing such a bundle: + + struct key_type key_type_keyring; + +This can be used with a function such as request_key() to find a specific +keyring in a process's keyrings. A keyring thus found can then be searched +with keyring_search(). Note that it is not possible to use request_key() to +search a specific keyring, so using keyrings in this way is of limited utility. + + +=================================== +NOTES ON ACCESSING PAYLOAD CONTENTS +=================================== + +The simplest payload is just a number in key->payload.value. In this case, +there's no need to indulge in RCU or locking when accessing the payload. + +More complex payload contents must be allocated and a pointer to them set in +key->payload.data. One of the following ways must be selected to access the +data: + + (1) Unmodifiable key type. + + If the key type does not have a modify method, then the key's payload can + be accessed without any form of locking, provided that it's known to be + instantiated (uninstantiated keys cannot be "found"). + + (2) The key's semaphore. + + The semaphore could be used to govern access to the payload and to control + the payload pointer. It must be write-locked for modifications and would + have to be read-locked for general access. The disadvantage of doing this + is that the accessor may be required to sleep. + + (3) RCU. + + RCU must be used when the semaphore isn't already held; if the semaphore + is held then the contents can't change under you unexpectedly as the + semaphore must still be used to serialise modifications to the key. The + key management code takes care of this for the key type. + + However, this means using: + + rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock() + + to read the pointer, and: + + rcu_dereference() ... rcu_assign_pointer() ... call_rcu() + + to set the pointer and dispose of the old contents after a grace period. + Note that only the key type should ever modify a key's payload. + + Furthermore, an RCU controlled payload must hold a struct rcu_head for the + use of call_rcu() and, if the payload is of variable size, the length of + the payload. key->datalen cannot be relied upon to be consistent with the + payload just dereferenced if the key's semaphore is not held. + + +=================== +DEFINING A KEY TYPE +=================== + +A kernel service may want to define its own key type. For instance, an AFS +filesystem might want to define a Kerberos 5 ticket key type. To do this, it +author fills in a key_type struct and registers it with the system. + +Source files that implement key types should include the following header file: + + + +The structure has a number of fields, some of which are mandatory: + + (*) const char *name + + The name of the key type. This is used to translate a key type name + supplied by userspace into a pointer to the structure. + + + (*) size_t def_datalen + + This is optional - it supplies the default payload data length as + contributed to the quota. If the key type's payload is always or almost + always the same size, then this is a more efficient way to do things. + + The data length (and quota) on a particular key can always be changed + during instantiation or update by calling: + + int key_payload_reserve(struct key *key, size_t datalen); + + With the revised data length. Error EDQUOT will be returned if this is not + viable. + + + (*) int (*vet_description)(const char *description); + + This optional method is called to vet a key description. If the key type + doesn't approve of the key description, it may return an error, otherwise + it should return 0. + + + (*) int (*instantiate)(struct key *key, const void *data, size_t datalen); + + This method is called to attach a payload to a key during construction. + The payload attached need not bear any relation to the data passed to this + function. + + If the amount of data attached to the key differs from the size in + keytype->def_datalen, then key_payload_reserve() should be called. + + This method does not have to lock the key in order to attach a payload. + The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents + anything else from gaining access to the key. + + It is safe to sleep in this method. + + + (*) int (*update)(struct key *key, const void *data, size_t datalen); + + If this type of key can be updated, then this method should be provided. + It is called to update a key's payload from the blob of data provided. + + key_payload_reserve() should be called if the data length might change + before any changes are actually made. Note that if this succeeds, the type + is committed to changing the key because it's already been altered, so all + memory allocation must be done first. + + The key will have its semaphore write-locked before this method is called, + but this only deters other writers; any changes to the key's payload must + be made under RCU conditions, and call_rcu() must be used to dispose of + the old payload. + + key_payload_reserve() should be called before the changes are made, but + after all allocations and other potentially failing function calls are + made. + + It is safe to sleep in this method. + + + (*) int (*match)(const struct key *key, const void *desc); + + This method is called to match a key against a description. It should + return non-zero if the two match, zero if they don't. + + This method should not need to lock the key in any way. The type and + description can be considered invariant, and the payload should not be + accessed (the key may not yet be instantiated). + + It is not safe to sleep in this method; the caller may hold spinlocks. + + + (*) void (*revoke)(struct key *key); + + This method is optional. It is called to discard part of the payload + data upon a key being revoked. The caller will have the key semaphore + write-locked. + + It is safe to sleep in this method, though care should be taken to avoid + a deadlock against the key semaphore. + + + (*) void (*destroy)(struct key *key); + + This method is optional. It is called to discard the payload data on a key + when it is being destroyed. + + This method does not need to lock the key to access the payload; it can + consider the key as being inaccessible at this time. Note that the key's + type may have been changed before this function is called. + + It is not safe to sleep in this method; the caller may hold spinlocks. + + + (*) void (*describe)(const struct key *key, struct seq_file *p); + + This method is optional. It is called during /proc/keys reading to + summarise a key's description and payload in text form. + + This method will be called with the RCU read lock held. rcu_dereference() + should be used to read the payload pointer if the payload is to be + accessed. key->datalen cannot be trusted to stay consistent with the + contents of the payload. + + The description will not change, though the key's state may. + + It is not safe to sleep in this method; the RCU read lock is held by the + caller. + + + (*) long (*read)(const struct key *key, char __user *buffer, size_t buflen); + + This method is optional. It is called by KEYCTL_READ to translate the + key's payload into something a blob of data for userspace to deal with. + Ideally, the blob should be in the same format as that passed in to the + instantiate and update methods. + + If successful, the blob size that could be produced should be returned + rather than the size copied. + + This method will be called with the key's semaphore read-locked. This will + prevent the key's payload changing. It is not necessary to use RCU locking + when accessing the key's payload. It is safe to sleep in this method, such + as might happen when the userspace buffer is accessed. + + + (*) int (*request_key)(struct key_construction *cons, const char *op, + void *aux); + + This method is optional. If provided, request_key() and friends will + invoke this function rather than upcalling to /sbin/request-key to operate + upon a key of this type. + + The aux parameter is as passed to request_key_async_with_auxdata() and + similar or is NULL otherwise. Also passed are the construction record for + the key to be operated upon and the operation type (currently only + "create"). + + This method is permitted to return before the upcall is complete, but the + following function must be called under all circumstances to complete the + instantiation process, whether or not it succeeds, whether or not there's + an error: + + void complete_request_key(struct key_construction *cons, int error); + + The error parameter should be 0 on success, -ve on error. The + construction record is destroyed by this action and the authorisation key + will be revoked. If an error is indicated, the key under construction + will be negatively instantiated if it wasn't already instantiated. + + If this method returns an error, that error will be returned to the + caller of request_key*(). complete_request_key() must be called prior to + returning. + + The key under construction and the authorisation key can be found in the + key_construction struct pointed to by cons: + + (*) struct key *key; + + The key under construction. + + (*) struct key *authkey; + + The authorisation key. + + +============================ +REQUEST-KEY CALLBACK SERVICE +============================ + +To create a new key, the kernel will attempt to execute the following command +line: + + /sbin/request-key create \ + + + is the key being constructed, and the three keyrings are the process +keyrings from the process that caused the search to be issued. These are +included for two reasons: + + (1) There may be an authentication token in one of the keyrings that is + required to obtain the key, eg: a Kerberos Ticket-Granting Ticket. + + (2) The new key should probably be cached in one of these rings. + +This program should set it UID and GID to those specified before attempting to +access any more keys. It may then look around for a user specific process to +hand the request off to (perhaps a path held in placed in another key by, for +example, the KDE desktop manager). + +The program (or whatever it calls) should finish construction of the key by +calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to +cache the key in one of the keyrings (probably the session ring) before +returning. Alternatively, the key can be marked as negative with KEYCTL_NEGATE +or KEYCTL_REJECT; this also permits the key to be cached in one of the +keyrings. + +If it returns with the key remaining in the unconstructed state, the key will +be marked as being negative, it will be added to the session keyring, and an +error will be returned to the key requestor. + +Supplementary information may be provided from whoever or whatever invoked this +service. This will be passed as the parameter. If no such +information was made available, then "-" will be passed as this parameter +instead. + + +Similarly, the kernel may attempt to update an expired or a soon to expire key +by executing: + + /sbin/request-key update \ + + +In this case, the program isn't required to actually attach the key to a ring; +the rings are provided for reference. + + +================== +GARBAGE COLLECTION +================== + +Dead keys (for which the type has been removed) will be automatically unlinked +from those keyrings that point to them and deleted as soon as possible by a +background garbage collector. + +Similarly, revoked and expired keys will be garbage collected, but only after a +certain amount of time has passed. This time is set as a number of seconds in: + + /proc/sys/kernel/keys/gc_delay diff --git a/Documentation/tomoyo.txt b/Documentation/security/tomoyo.txt similarity index 100% rename from Documentation/tomoyo.txt rename to Documentation/security/tomoyo.txt diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 9822afb6313c..89757012c7ff 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -1230,6 +1230,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. This module supports multiple cards. The driver requires the firmware loader support on kernel. + Module snd-lola + --------------- + + Module for Digigram Lola PCI-e boards + + This module supports multiple cards. + Module snd-lx6464es ------------------- diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 0caf77e59be4..d70c93bdcadf 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -94,7 +94,7 @@ ALC662/663/272 3stack-dig 3-stack (2-channel) with SPDIF 3stack-6ch 3-stack (6-channel) 3stack-6ch-dig 3-stack (6-channel) with SPDIF - 6stack-dig 6-stack with SPDIF + 5stack-dig 5-stack with SPDIF lenovo-101e Lenovo laptop eeepc-p701 ASUS Eeepc P701 eeepc-ep20 ASUS Eeepc EP20 diff --git a/Documentation/sound/alsa/SB-Live-mixer.txt b/Documentation/sound/alsa/SB-Live-mixer.txt index f5639d40521d..f4b5988f450c 100644 --- a/Documentation/sound/alsa/SB-Live-mixer.txt +++ b/Documentation/sound/alsa/SB-Live-mixer.txt @@ -87,14 +87,14 @@ accumulator. ALSA uses accumulators 0 and 1 for left and right PCM. The result is forwarded to the ADC capture FIFO (thus to the standard capture PCM device). -name='Music Playback Volume',index=0 +name='Synth Playback Volume',index=0 This control is used to attenuate samples for left and right MIDI FX-bus accumulators. ALSA uses accumulators 4 and 5 for left and right MIDI samples. The result samples are forwarded to the front DAC PCM slots of the AC97 codec. -name='Music Capture Volume',index=0 -name='Music Capture Switch',index=0 +name='Synth Capture Volume',index=0 +name='Synth Capture Switch',index=0 These controls are used to attenuate samples for left and right MIDI FX-bus accumulator. ALSA uses accumulators 4 and 5 for left and right PCM. diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt index 847b342b7b20..db3be892afb2 100644 --- a/Documentation/stable_api_nonsense.txt +++ b/Documentation/stable_api_nonsense.txt @@ -122,7 +122,7 @@ operating system to suffer. In both of these instances, all developers agreed that these were important changes that needed to be made, and they were made, with -relatively little pain. If Linux had to ensure that it preserve a +relatively little pain. If Linux had to ensure that it will preserve a stable source interface, a new interface would have been created, and the older, broken one would have had to be maintained over time, leading to extra work for the USB developers. Since all Linux USB developers do diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 4af0614147ef..88fd7f5c8dcd 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -231,13 +231,6 @@ its creation). This directory contains configuration options for the epoll(7) interface. -max_user_instances ------------------- - -This is the maximum number of epoll file descriptors that a single user can -have open at a given time. The default value is 128, and should be enough -for normal users. - max_user_watches ---------------- diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 36f007514db3..5e7cb39ad195 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -161,7 +161,8 @@ core_pattern is used to specify a core dumpfile pattern name. %s signal number %t UNIX time of dump %h hostname - %e executable filename + %e executable filename (may be shortened) + %E executable path % both are dropped . If the first character of the pattern is a '|', the kernel will treat the rest of the pattern as a command to run. The core dump will be diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index cbd05ffc606b..3201a7097e4d 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -32,6 +32,17 @@ Table : Subdirectories in /proc/sys/net 1. /proc/sys/net/core - Network core options ------------------------------------------------------- +bpf_jit_enable +-------------- + +This enables Berkeley Packet Filter Just in Time compiler. +Currently supported on x86_64 architecture, bpf_jit provides a framework +to speed packet filtering, the one used by tcpdump/libpcap for example. +Values : + 0 - disable the JIT (default value) + 1 - enable the JIT + 2 - enable the JIT and ask the compiler to emit traces on kernel log. + rmem_default ------------ diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 30289fab86eb..96f0ee825bed 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -481,10 +481,10 @@ the DMA zone. Type(A) is called as "Node" order. Type (B) is "Zone" order. "Node order" orders the zonelists by node, then by zone within each node. -Specify "[Nn]ode" for zone order +Specify "[Nn]ode" for node order "Zone Order" orders the zonelists by zone type, then by node within each -zone. Specify "[Zz]one"for zode order. +zone. Specify "[Zz]one" for zone order. Specify "[Dd]efault" to request automatic configuration. Autoconfiguration will select "node" order in following case. diff --git a/Documentation/timers/timers-howto.txt b/Documentation/timers/timers-howto.txt index c9ef29d2ede3..038f8c77a076 100644 --- a/Documentation/timers/timers-howto.txt +++ b/Documentation/timers/timers-howto.txt @@ -24,7 +24,7 @@ ATOMIC CONTEXT: ndelay(unsigned long nsecs) udelay(unsigned long usecs) - mdelay(unsgined long msecs) + mdelay(unsigned long msecs) udelay is the generally preferred API; ndelay-level precision may not actually exist on many non-PC devices. diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 6d27ab8d6e9f..c83bd6b4e6e8 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -120,7 +120,6 @@ format: field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1;signed:0; field:int common_pid; offset:4; size:4; signed:1; - field:int common_lock_depth; offset:8; size:4; signed:1; field:unsigned long __probe_ip; offset:12; size:4; signed:0; field:int __probe_nargs; offset:16; size:4; signed:1; diff --git a/Documentation/uml/UserModeLinux-HOWTO.txt b/Documentation/uml/UserModeLinux-HOWTO.txt deleted file mode 100644 index 9b7e1904db1c..000000000000 --- a/Documentation/uml/UserModeLinux-HOWTO.txt +++ /dev/null @@ -1,4579 +0,0 @@ - User Mode Linux HOWTO - User Mode Linux Core Team - Mon Nov 18 14:16:16 EST 2002 - - This document describes the use and abuse of Jeff Dike's User Mode - Linux: a port of the Linux kernel as a normal Intel Linux process. - ______________________________________________________________________ - - Table of Contents - - 1. Introduction - - 1.1 How is User Mode Linux Different? - 1.2 Why Would I Want User Mode Linux? - - 2. Compiling the kernel and modules - - 2.1 Compiling the kernel - 2.2 Compiling and installing kernel modules - 2.3 Compiling and installing uml_utilities - - 3. Running UML and logging in - - 3.1 Running UML - 3.2 Logging in - 3.3 Examples - - 4. UML on 2G/2G hosts - - 4.1 Introduction - 4.2 The problem - 4.3 The solution - - 5. Setting up serial lines and consoles - - 5.1 Specifying the device - 5.2 Specifying the channel - 5.3 Examples - - 6. Setting up the network - - 6.1 General setup - 6.2 Userspace daemons - 6.3 Specifying ethernet addresses - 6.4 UML interface setup - 6.5 Multicast - 6.6 TUN/TAP with the uml_net helper - 6.7 TUN/TAP with a preconfigured tap device - 6.8 Ethertap - 6.9 The switch daemon - 6.10 Slip - 6.11 Slirp - 6.12 pcap - 6.13 Setting up the host yourself - - 7. Sharing Filesystems between Virtual Machines - - 7.1 A warning - 7.2 Using layered block devices - 7.3 Note! - 7.4 Another warning - 7.5 uml_moo : Merging a COW file with its backing file - - 8. Creating filesystems - - 8.1 Create the filesystem file - 8.2 Assign the file to a UML device - 8.3 Creating and mounting the filesystem - - 9. Host file access - - 9.1 Using hostfs - 9.2 hostfs as the root filesystem - 9.3 Building hostfs - - 10. The Management Console - 10.1 version - 10.2 halt and reboot - 10.3 config - 10.4 remove - 10.5 sysrq - 10.6 help - 10.7 cad - 10.8 stop - 10.9 go - - 11. Kernel debugging - - 11.1 Starting the kernel under gdb - 11.2 Examining sleeping processes - 11.3 Running ddd on UML - 11.4 Debugging modules - 11.5 Attaching gdb to the kernel - 11.6 Using alternate debuggers - - 12. Kernel debugging examples - - 12.1 The case of the hung fsck - 12.2 Episode 2: The case of the hung fsck - - 13. What to do when UML doesn't work - - 13.1 Strange compilation errors when you build from source - 13.2 (obsolete) - 13.3 A variety of panics and hangs with /tmp on a reiserfs filesystem - 13.4 The compile fails with errors about conflicting types for 'open', 'dup', and 'waitpid' - 13.5 UML doesn't work when /tmp is an NFS filesystem - 13.6 UML hangs on boot when compiled with gprof support - 13.7 syslogd dies with a SIGTERM on startup - 13.8 TUN/TAP networking doesn't work on a 2.4 host - 13.9 You can network to the host but not to other machines on the net - 13.10 I have no root and I want to scream - 13.11 UML build conflict between ptrace.h and ucontext.h - 13.12 The UML BogoMips is exactly half the host's BogoMips - 13.13 When you run UML, it immediately segfaults - 13.14 xterms appear, then immediately disappear - 13.15 Any other panic, hang, or strange behavior - - 14. Diagnosing Problems - - 14.1 Case 1 : Normal kernel panics - 14.2 Case 2 : Tracing thread panics - 14.3 Case 3 : Tracing thread panics caused by other threads - 14.4 Case 4 : Hangs - - 15. Thanks - - 15.1 Code and Documentation - 15.2 Flushing out bugs - 15.3 Buglets and clean-ups - 15.4 Case Studies - 15.5 Other contributions - - - ______________________________________________________________________ - - 11.. IInnttrroodduuccttiioonn - - Welcome to User Mode Linux. It's going to be fun. - - - - 11..11.. HHooww iiss UUsseerr MMooddee LLiinnuuxx DDiiffffeerreenntt?? - - Normally, the Linux Kernel talks straight to your hardware (video - card, keyboard, hard drives, etc), and any programs which run ask the - kernel to operate the hardware, like so: - - - - +-----------+-----------+----+ - | Process 1 | Process 2 | ...| - +-----------+-----------+----+ - | Linux Kernel | - +----------------------------+ - | Hardware | - +----------------------------+ - - - - - The User Mode Linux Kernel is different; instead of talking to the - hardware, it talks to a `real' Linux kernel (called the `host kernel' - from now on), like any other program. Programs can then run inside - User-Mode Linux as if they were running under a normal kernel, like - so: - - - - +----------------+ - | Process 2 | ...| - +-----------+----------------+ - | Process 1 | User-Mode Linux| - +----------------------------+ - | Linux Kernel | - +----------------------------+ - | Hardware | - +----------------------------+ - - - - - - 11..22.. WWhhyy WWoouulldd II WWaanntt UUsseerr MMooddee LLiinnuuxx?? - - - 1. If User Mode Linux crashes, your host kernel is still fine. - - 2. You can run a usermode kernel as a non-root user. - - 3. You can debug the User Mode Linux like any normal process. - - 4. You can run gprof (profiling) and gcov (coverage testing). - - 5. You can play with your kernel without breaking things. - - 6. You can use it as a sandbox for testing new apps. - - 7. You can try new development kernels safely. - - 8. You can run different distributions simultaneously. - - 9. It's extremely fun. - - - - - - 22.. CCoommppiilliinngg tthhee kkeerrnneell aanndd mmoodduulleess - - - - - 22..11.. CCoommppiilliinngg tthhee kkeerrnneell - - - Compiling the user mode kernel is just like compiling any other - kernel. Let's go through the steps, using 2.4.0-prerelease (current - as of this writing) as an example: - - - 1. Download the latest UML patch from - - the download page - . - - - 3. Make a directory and unpack the kernel into it. - - - - host% - mkdir ~/uml - - - - - - - host% - cd ~/uml - - - - - - - host% - tar -xzvf linux-2.4.0-prerelease.tar.bz2 - - - - - - - 4. Apply the patch using - - - - host% - cd ~/uml/linux - - - - host% - bzcat uml-patch-2.4.0-prerelease.bz2 | patch -p1 - - - - - - - 5. Run your favorite config; `make xconfig ARCH=um' is the most - convenient. `make config ARCH=um' and 'make menuconfig ARCH=um' - will work as well. The defaults will give you a useful kernel. If - you want to change something, go ahead, it probably won't hurt - anything. - - - Note: If the host is configured with a 2G/2G address space split - rather than the usual 3G/1G split, then the packaged UML binaries - will not run. They will immediately segfault. See ``UML on 2G/2G - hosts'' for the scoop on running UML on your system. - - - - 6. Finish with `make linux ARCH=um': the result is a file called - `linux' in the top directory of your source tree. - - Make sure that you don't build this kernel in /usr/src/linux. On some - distributions, /usr/include/asm is a link into this pool. The user- - mode build changes the other end of that link, and things that include - stop compiling. - - The sources are also available from cvs at the project's cvs page, - which has directions on getting the sources. You can also browse the - CVS pool from there. - - If you get the CVS sources, you will have to check them out into an - empty directory. You will then have to copy each file into the - corresponding directory in the appropriate kernel pool. - - If you don't have the latest kernel pool, you can get the - corresponding user-mode sources with - - - host% cvs co -r v_2_3_x linux - - - - - where 'x' is the version in your pool. Note that you will not get the - bug fixes and enhancements that have gone into subsequent releases. - - - 22..22.. CCoommppiilliinngg aanndd iinnssttaalllliinngg kkeerrnneell mmoodduulleess - - UML modules are built in the same way as the native kernel (with the - exception of the 'ARCH=um' that you always need for UML): - - - host% make modules ARCH=um - - - - - Any modules that you want to load into this kernel need to be built in - the user-mode pool. Modules from the native kernel won't work. - - You can install them by using ftp or something to copy them into the - virtual machine and dropping them into /lib/modules/`uname -r`. - - You can also get the kernel build process to install them as follows: - - 1. with the kernel not booted, mount the root filesystem in the top - level of the kernel pool: - - - host% mount root_fs mnt -o loop - - - - - - - 2. run - - - host% - make modules_install INSTALL_MOD_PATH=`pwd`/mnt ARCH=um - - - - - - - 3. unmount the filesystem - - - host% umount mnt - - - - - - - 4. boot the kernel on it - - - When the system is booted, you can use insmod as usual to get the - modules into the kernel. A number of things have been loaded into UML - as modules, especially filesystems and network protocols and filters, - so most symbols which need to be exported probably already are. - However, if you do find symbols that need exporting, let us - know, and - they'll be "taken care of". - - - - 22..33.. CCoommppiilliinngg aanndd iinnssttaalllliinngg uummll__uuttiilliittiieess - - Many features of the UML kernel require a user-space helper program, - so a uml_utilities package is distributed separately from the kernel - patch which provides these helpers. Included within this is: - - +o port-helper - Used by consoles which connect to xterms or ports - - +o tunctl - Configuration tool to create and delete tap devices - - +o uml_net - Setuid binary for automatic tap device configuration - - +o uml_switch - User-space virtual switch required for daemon - transport - - The uml_utilities tree is compiled with: - - - host# - make && make install - - - - - Note that UML kernel patches may require a specific version of the - uml_utilities distribution. If you don't keep up with the mailing - lists, ensure that you have the latest release of uml_utilities if you - are experiencing problems with your UML kernel, particularly when - dealing with consoles or command-line switches to the helper programs - - - - - - - - - 33.. RRuunnnniinngg UUMMLL aanndd llooggggiinngg iinn - - - - 33..11.. RRuunnnniinngg UUMMLL - - It runs on 2.2.15 or later, and all 2.4 kernels. - - - Booting UML is straightforward. Simply run 'linux': it will try to - mount the file `root_fs' in the current directory. You do not need to - run it as root. If your root filesystem is not named `root_fs', then - you need to put a `ubd0=root_fs_whatever' switch on the linux command - line. - - - You will need a filesystem to boot UML from. There are a number - available for download from here . There are also several tools - which can be - used to generate UML-compatible filesystem images from media. - The kernel will boot up and present you with a login prompt. - - - Note: If the host is configured with a 2G/2G address space split - rather than the usual 3G/1G split, then the packaged UML binaries will - not run. They will immediately segfault. See ``UML on 2G/2G hosts'' - for the scoop on running UML on your system. - - - - 33..22.. LLooggggiinngg iinn - - - - The prepackaged filesystems have a root account with password 'root' - and a user account with password 'user'. The login banner will - generally tell you how to log in. So, you log in and you will find - yourself inside a little virtual machine. Our filesystems have a - variety of commands and utilities installed (and it is fairly easy to - add more), so you will have a lot of tools with which to poke around - the system. - - There are a couple of other ways to log in: - - +o On a virtual console - - - - Each virtual console that is configured (i.e. the device exists in - /dev and /etc/inittab runs a getty on it) will come up in its own - xterm. If you get tired of the xterms, read ``Setting up serial - lines and consoles'' to see how to attach the consoles to - something else, like host ptys. - - - - +o Over the serial line - - - In the boot output, find a line that looks like: - - - - serial line 0 assigned pty /dev/ptyp1 - - - - - Attach your favorite terminal program to the corresponding tty. I.e. - for minicom, the command would be - - - host% minicom -o -p /dev/ttyp1 - - - - - - - +o Over the net - - - If the network is running, then you can telnet to the virtual - machine and log in to it. See ``Setting up the network'' to learn - about setting up a virtual network. - - When you're done using it, run halt, and the kernel will bring itself - down and the process will exit. - - - 33..33.. EExxaammpplleess - - Here are some examples of UML in action: - - +o A login session - - +o A virtual network - - - - - - - - 44.. UUMMLL oonn 22GG//22GG hhoossttss - - - - - 44..11.. IInnttrroodduuccttiioonn - - - Most Linux machines are configured so that the kernel occupies the - upper 1G (0xc0000000 - 0xffffffff) of the 4G address space and - processes use the lower 3G (0x00000000 - 0xbfffffff). However, some - machine are configured with a 2G/2G split, with the kernel occupying - the upper 2G (0x80000000 - 0xffffffff) and processes using the lower - 2G (0x00000000 - 0x7fffffff). - - - - - 44..22.. TThhee pprroobblleemm - - - The prebuilt UML binaries on this site will not run on 2G/2G hosts - because UML occupies the upper .5G of the 3G process address space - (0xa0000000 - 0xbfffffff). Obviously, on 2G/2G hosts, this is right - in the middle of the kernel address space, so UML won't even load - it - will immediately segfault. - - - - - 44..33.. TThhee ssoolluuttiioonn - - - The fix for this is to rebuild UML from source after enabling - CONFIG_HOST_2G_2G (under 'General Setup'). This will cause UML to - load itself in the top .5G of that smaller process address space, - where it will run fine. See ``Compiling the kernel and modules'' if - you need help building UML from source. - - - - - - - - - - - 55.. SSeettttiinngg uupp sseerriiaall lliinneess aanndd ccoonnssoolleess - - - It is possible to attach UML serial lines and consoles to many types - of host I/O channels by specifying them on the command line. - - - You can attach them to host ptys, ttys, file descriptors, and ports. - This allows you to do things like - - +o have a UML console appear on an unused host console, - - +o hook two virtual machines together by having one attach to a pty - and having the other attach to the corresponding tty - - +o make a virtual machine accessible from the net by attaching a - console to a port on the host. - - - The general format of the command line option is device=channel. - - - - 55..11.. SSppeecciiffyyiinngg tthhee ddeevviiccee - - Devices are specified with "con" or "ssl" (console or serial line, - respectively), optionally with a device number if you are talking - about a specific device. - - - Using just "con" or "ssl" describes all of the consoles or serial - lines. If you want to talk about console #3 or serial line #10, they - would be "con3" and "ssl10", respectively. - - - A specific device name will override a less general "con=" or "ssl=". - So, for example, you can assign a pty to each of the serial lines - except for the first two like this: - - - ssl=pty ssl0=tty:/dev/tty0 ssl1=tty:/dev/tty1 - - - - - The specificity of the device name is all that matters; order on the - command line is irrelevant. - - - - 55..22.. SSppeecciiffyyiinngg tthhee cchhaannnneell - - There are a number of different types of channels to attach a UML - device to, each with a different way of specifying exactly what to - attach to. - - +o pseudo-terminals - device=pty pts terminals - device=pts - - - This will cause UML to allocate a free host pseudo-terminal for the - device. The terminal that it got will be announced in the boot - log. You access it by attaching a terminal program to the - corresponding tty: - - +o screen /dev/pts/n - - +o screen /dev/ttyxx - - +o minicom -o -p /dev/ttyxx - minicom seems not able to handle pts - devices - - +o kermit - start it up, 'open' the device, then 'connect' - - - - - - +o terminals - device=tty:tty device file - - - This will make UML attach the device to the specified tty (i.e - - - con1=tty:/dev/tty3 - - - - - will attach UML's console 1 to the host's /dev/tty3). If the tty that - you specify is the slave end of a tty/pty pair, something else must - have already opened the corresponding pty in order for this to work. - - - - - - +o xterms - device=xterm - - - UML will run an xterm and the device will be attached to it. - - - - - - +o Port - device=port:port number - - - This will attach the UML devices to the specified host port. - Attaching console 1 to the host's port 9000 would be done like - this: - - - con1=port:9000 - - - - - Attaching all the serial lines to that port would be done similarly: - - - ssl=port:9000 - - - - - You access these devices by telnetting to that port. Each active tel- - net session gets a different device. If there are more telnets to a - port than UML devices attached to it, then the extra telnet sessions - will block until an existing telnet detaches, or until another device - becomes active (i.e. by being activated in /etc/inittab). - - This channel has the advantage that you can both attach multiple UML - devices to it and know how to access them without reading the UML boot - log. It is also unique in allowing access to a UML from remote - machines without requiring that the UML be networked. This could be - useful in allowing public access to UMLs because they would be - accessible from the net, but wouldn't need any kind of network - filtering or access control because they would have no network access. - - - If you attach the main console to a portal, then the UML boot will - appear to hang. In reality, it's waiting for a telnet to connect, at - which point the boot will proceed. - - - - - - +o already-existing file descriptors - device=file descriptor - - - If you set up a file descriptor on the UML command line, you can - attach a UML device to it. This is most commonly used to put the - main console back on stdin and stdout after assigning all the other - consoles to something else: - - - con0=fd:0,fd:1 con=pts - - - - - - - - - +o Nothing - device=null - - - This allows the device to be opened, in contrast to 'none', but - reads will block, and writes will succeed and the data will be - thrown out. - - - - - - +o None - device=none - - - This causes the device to disappear. - - - - You can also specify different input and output channels for a device - by putting a comma between them: - - - ssl3=tty:/dev/tty2,xterm - - - - - will cause serial line 3 to accept input on the host's /dev/tty3 and - display output on an xterm. That's a silly example - the most common - use of this syntax is to reattach the main console to stdin and stdout - as shown above. - - - If you decide to move the main console away from stdin/stdout, the - initial boot output will appear in the terminal that you're running - UML in. However, once the console driver has been officially - initialized, then the boot output will start appearing wherever you - specified that console 0 should be. That device will receive all - subsequent output. - - - - 55..33.. EExxaammpplleess - - There are a number of interesting things you can do with this - capability. - - - First, this is how you get rid of those bleeding console xterms by - attaching them to host ptys: - - - con=pty con0=fd:0,fd:1 - - - - - This will make a UML console take over an unused host virtual console, - so that when you switch to it, you will see the UML login prompt - rather than the host login prompt: - - - con1=tty:/dev/tty6 - - - - - You can attach two virtual machines together with what amounts to a - serial line as follows: - - Run one UML with a serial line attached to a pty - - - - ssl1=pty - - - - - Look at the boot log to see what pty it got (this example will assume - that it got /dev/ptyp1). - - Boot the other UML with a serial line attached to the corresponding - tty - - - - ssl1=tty:/dev/ttyp1 - - - - - Log in, make sure that it has no getty on that serial line, attach a - terminal program like minicom to it, and you should see the login - prompt of the other virtual machine. - - - 66.. SSeettttiinngg uupp tthhee nneettwwoorrkk - - - - This page describes how to set up the various transports and to - provide a UML instance with network access to the host, other machines - on the local net, and the rest of the net. - - - As of 2.4.5, UML networking has been completely redone to make it much - easier to set up, fix bugs, and add new features. - - - There is a new helper, uml_net, which does the host setup that - requires root privileges. - - - There are currently five transport types available for a UML virtual - machine to exchange packets with other hosts: - - +o ethertap - - +o TUN/TAP - - +o Multicast - - +o a switch daemon - - +o slip - - +o slirp - - +o pcap - - The TUN/TAP, ethertap, slip, and slirp transports allow a UML - instance to exchange packets with the host. They may be directed - to the host or the host may just act as a router to provide access - to other physical or virtual machines. - - - The pcap transport is a synthetic read-only interface, using the - libpcap binary to collect packets from interfaces on the host and - filter them. This is useful for building preconfigured traffic - monitors or sniffers. - - - The daemon and multicast transports provide a completely virtual - network to other virtual machines. This network is completely - disconnected from the physical network unless one of the virtual - machines on it is acting as a gateway. - - - With so many host transports, which one should you use? Here's when - you should use each one: - - +o ethertap - if you want access to the host networking and it is - running 2.2 - - +o TUN/TAP - if you want access to the host networking and it is - running 2.4. Also, the TUN/TAP transport is able to use a - preconfigured device, allowing it to avoid using the setuid uml_net - helper, which is a security advantage. - - +o Multicast - if you want a purely virtual network and you don't want - to set up anything but the UML - - +o a switch daemon - if you want a purely virtual network and you - don't mind running the daemon in order to get somewhat better - performance - - +o slip - there is no particular reason to run the slip backend unless - ethertap and TUN/TAP are just not available for some reason - - +o slirp - if you don't have root access on the host to setup - networking, or if you don't want to allocate an IP to your UML - - +o pcap - not much use for actual network connectivity, but great for - monitoring traffic on the host - - Ethertap is available on 2.4 and works fine. TUN/TAP is preferred - to it because it has better performance and ethertap is officially - considered obsolete in 2.4. Also, the root helper only needs to - run occasionally for TUN/TAP, rather than handling every packet, as - it does with ethertap. This is a slight security advantage since - it provides fewer opportunities for a nasty UML user to somehow - exploit the helper's root privileges. - - - 66..11.. GGeenneerraall sseettuupp - - First, you must have the virtual network enabled in your UML. If are - running a prebuilt kernel from this site, everything is already - enabled. If you build the kernel yourself, under the "Network device - support" menu, enable "Network device support", and then the three - transports. - - - The next step is to provide a network device to the virtual machine. - This is done by describing it on the kernel command line. - - The general format is - - - eth = , - - - - - For example, a virtual ethernet device may be attached to a host - ethertap device as follows: - - - eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254 - - - - - This sets up eth0 inside the virtual machine to attach itself to the - host /dev/tap0, assigns it an ethernet address, and assigns the host - tap0 interface an IP address. - - - - Note that the IP address you assign to the host end of the tap device - must be different than the IP you assign to the eth device inside UML. - If you are short on IPs and don't want to consume two per UML, then - you can reuse the host's eth IP address for the host ends of the tap - devices. Internally, the UMLs must still get unique IPs for their eth - devices. You can also give the UMLs non-routable IPs (192.168.x.x or - 10.x.x.x) and have the host masquerade them. This will let outgoing - connections work, but incoming connections won't without more work, - such as port forwarding from the host. - Also note that when you configure the host side of an interface, it is - only acting as a gateway. It will respond to pings sent to it - locally, but is not useful to do that since it's a host interface. - You are not talking to the UML when you ping that interface and get a - response. - - - You can also add devices to a UML and remove them at runtime. See the - ``The Management Console'' page for details. - - - The sections below describe this in more detail. - - - Once you've decided how you're going to set up the devices, you boot - UML, log in, configure the UML side of the devices, and set up routes - to the outside world. At that point, you will be able to talk to any - other machines, physical or virtual, on the net. - - - If ifconfig inside UML fails and the network refuses to come up, run - tell you what went wrong. - - - - 66..22.. UUsseerrssppaaccee ddaaeemmoonnss - - You will likely need the setuid helper, or the switch daemon, or both. - They are both installed with the RPM and deb, so if you've installed - either, you can skip the rest of this section. - - - If not, then you need to check them out of CVS, build them, and - install them. The helper is uml_net, in CVS /tools/uml_net, and the - daemon is uml_switch, in CVS /tools/uml_router. They are both built - with a plain 'make'. Both need to be installed in a directory that's - in your path - /usr/bin is recommend. On top of that, uml_net needs - to be setuid root. - - - - 66..33.. SSppeecciiffyyiinngg eetthheerrnneett aaddddrreesssseess - - Below, you will see that the TUN/TAP, ethertap, and daemon interfaces - allow you to specify hardware addresses for the virtual ethernet - devices. This is generally not necessary. If you don't have a - specific reason to do it, you probably shouldn't. If one is not - specified on the command line, the driver will assign one based on the - device IP address. It will provide the address fe:fd:nn:nn:nn:nn - where nn.nn.nn.nn is the device IP address. This is nearly always - sufficient to guarantee a unique hardware address for the device. A - couple of exceptions are: - - +o Another set of virtual ethernet devices are on the same network and - they are assigned hardware addresses using a different scheme which - may conflict with the UML IP address-based scheme - - +o You aren't going to use the device for IP networking, so you don't - assign the device an IP address - - If you let the driver provide the hardware address, you should make - sure that the device IP address is known before the interface is - brought up. So, inside UML, this will guarantee that: - - - - UML# - ifconfig eth0 192.168.0.250 up - - - - - If you decide to assign the hardware address yourself, make sure that - the first byte of the address is even. Addresses with an odd first - byte are broadcast addresses, which you don't want assigned to a - device. - - - - 66..44.. UUMMLL iinntteerrffaaccee sseettuupp - - Once the network devices have been described on the command line, you - should boot UML and log in. - - - The first thing to do is bring the interface up: - - - UML# ifconfig ethn ip-address up - - - - - You should be able to ping the host at this point. - - - To reach the rest of the world, you should set a default route to the - host: - - - UML# route add default gw host ip - - - - - Again, with host ip of 192.168.0.4: - - - UML# route add default gw 192.168.0.4 - - - - - This page used to recommend setting a network route to your local net. - This is wrong, because it will cause UML to try to figure out hardware - addresses of the local machines by arping on the interface to the - host. Since that interface is basically a single strand of ethernet - with two nodes on it (UML and the host) and arp requests don't cross - networks, they will fail to elicit any responses. So, what you want - is for UML to just blindly throw all packets at the host and let it - figure out what to do with them, which is what leaving out the network - route and adding the default route does. - - - Note: If you can't communicate with other hosts on your physical - ethernet, it's probably because of a network route that's - automatically set up. If you run 'route -n' and see a route that - looks like this: - - - - - Destination Gateway Genmask Flags Metric Ref Use Iface - 192.168.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0 - - - - - with a mask that's not 255.255.255.255, then replace it with a route - to your host: - - - UML# - route del -net 192.168.0.0 dev eth0 netmask 255.255.255.0 - - - - - - - UML# - route add -host 192.168.0.4 dev eth0 - - - - - This, plus the default route to the host, will allow UML to exchange - packets with any machine on your ethernet. - - - - 66..55.. MMuullttiiccaasstt - - The simplest way to set up a virtual network between multiple UMLs is - to use the mcast transport. This was written by Harald Welte and is - present in UML version 2.4.5-5um and later. Your system must have - multicast enabled in the kernel and there must be a multicast-capable - network device on the host. Normally, this is eth0, but if there is - no ethernet card on the host, then you will likely get strange error - messages when you bring the device up inside UML. - - - To use it, run two UMLs with - - - eth0=mcast - - - - - on their command lines. Log in, configure the ethernet device in each - machine with different IP addresses: - - - UML1# ifconfig eth0 192.168.0.254 - - - - - - - UML2# ifconfig eth0 192.168.0.253 - - - - - and they should be able to talk to each other. - - The full set of command line options for this transport are - - - - ethn=mcast,ethernet address,multicast - address,multicast port,ttl - - - - - Harald's original README is here and explains these in detail, as well as - some other issues. - - - - 66..66.. TTUUNN//TTAAPP wwiitthh tthhee uummll__nneett hheellppeerr - - TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the - host. The TUN/TAP backend has been in UML since 2.4.9-3um. - - - The easiest way to get up and running is to let the setuid uml_net - helper do the host setup for you. This involves insmod-ing the tun.o - module if necessary, configuring the device, and setting up IP - forwarding, routing, and proxy arp. If you are new to UML networking, - do this first. If you're concerned about the security implications of - the setuid helper, use it to get up and running, then read the next - section to see how to have UML use a preconfigured tap device, which - avoids the use of uml_net. - - - If you specify an IP address for the host side of the device, the - uml_net helper will do all necessary setup on the host - the only - requirement is that TUN/TAP be available, either built in to the host - kernel or as the tun.o module. - - The format of the command line switch to attach a device to a TUN/TAP - device is - - - eth =tuntap,,, - - - - - For example, this argument will attach the UML's eth0 to the next - available tap device and assign an ethernet address to it based on its - IP address - - - eth0=tuntap,,,192.168.0.254 - - - - - - - Note that the IP address that must be used for the eth device inside - UML is fixed by the routing and proxy arp that is set up on the - TUN/TAP device on the host. You can use a different one, but it won't - work because reply packets won't reach the UML. This is a feature. - It prevents a nasty UML user from doing things like setting the UML IP - to the same as the network's nameserver or mail server. - - - There are a couple potential problems with running the TUN/TAP - transport on a 2.4 host kernel - - +o TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host - kernel or use the ethertap transport. - - +o With an upgraded kernel, TUN/TAP may fail with - - - File descriptor in bad state - - - - - This is due to a header mismatch between the upgraded kernel and the - kernel that was originally installed on the machine. The fix is to - make sure that /usr/src/linux points to the headers for the running - kernel. - - These were pointed out by Tim Robinson in - name="this uml- - user post"> . - - - - 66..77.. TTUUNN//TTAAPP wwiitthh aa pprreeccoonnffiigguurreedd ttaapp ddeevviiccee - - If you prefer not to have UML use uml_net (which is somewhat - insecure), with UML 2.4.17-11, you can set up a TUN/TAP device - beforehand. The setup needs to be done as root, but once that's done, - there is no need for root assistance. Setting up the device is done - as follows: - - +o Create the device with tunctl (available from the UML utilities - tarball) - - - - - host# tunctl -u uid - - - - - where uid is the user id or username that UML will be run as. This - will tell you what device was created. - - +o Configure the device IP (change IP addresses and device name to - suit) - - - - - host# ifconfig tap0 192.168.0.254 up - - - - - - +o Set up routing and arping if desired - this is my recipe, there are - other ways of doing the same thing - - - host# - bash -c 'echo 1 > /proc/sys/net/ipv4/ip_forward' - - host# - route add -host 192.168.0.253 dev tap0 - - - - - - - host# - bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap0/proxy_arp' - - - - - - - host# - arp -Ds 192.168.0.253 eth0 pub - - - - - Note that this must be done every time the host boots - this configu- - ration is not stored across host reboots. So, it's probably a good - idea to stick it in an rc file. An even better idea would be a little - utility which reads the information from a config file and sets up - devices at boot time. - - +o Rather than using up two IPs and ARPing for one of them, you can - also provide direct access to your LAN by the UML by using a - bridge. - - - host# - brctl addbr br0 - - - - - - - host# - ifconfig eth0 0.0.0.0 promisc up - - - - - - - host# - ifconfig tap0 0.0.0.0 promisc up - - - - - - - host# - ifconfig br0 192.168.0.1 netmask 255.255.255.0 up - - - - - - - - host# - brctl stp br0 off - - - - - - - host# - brctl setfd br0 1 - - - - - - - host# - brctl sethello br0 1 - - - - - - - host# - brctl addif br0 eth0 - - - - - - - host# - brctl addif br0 tap0 - - - - - Note that 'br0' should be setup using ifconfig with the existing IP - address of eth0, as eth0 no longer has its own IP. - - +o - - - Also, the /dev/net/tun device must be writable by the user running - UML in order for the UML to use the device that's been configured - for it. The simplest thing to do is - - - host# chmod 666 /dev/net/tun - - - - - Making it world-writable looks bad, but it seems not to be - exploitable as a security hole. However, it does allow anyone to cre- - ate useless tap devices (useless because they can't configure them), - which is a DOS attack. A somewhat more secure alternative would to be - to create a group containing all the users who have preconfigured tap - devices and chgrp /dev/net/tun to that group with mode 664 or 660. - - - +o Once the device is set up, run UML with 'eth0=tuntap,device name' - (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the - mconsole config command). - - +o Bring the eth device up in UML and you're in business. - - If you don't want that tap device any more, you can make it non- - persistent with - - - host# tunctl -d tap device - - - - - Finally, tunctl has a -b (for brief mode) switch which causes it to - output only the name of the tap device it created. This makes it - suitable for capture by a script: - - - host# TAP=`tunctl -u 1000 -b` - - - - - - - 66..88.. EEtthheerrttaapp - - Ethertap is the general mechanism on 2.2 for userspace processes to - exchange packets with the kernel. - - - - To use this transport, you need to describe the virtual network device - on the UML command line. The general format for this is - - - eth =ethertap, , , - - - - - So, the previous example - - - eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254 - - - - - attaches the UML eth0 device to the host /dev/tap0, assigns it the - ethernet address fe:fd:0:0:0:1, and assigns the IP address - 192.168.0.254 to the tap device. - - - - The tap device is mandatory, but the others are optional. If the - ethernet address is omitted, one will be assigned to it. - - - The presence of the tap IP address will cause the helper to run and do - whatever host setup is needed to allow the virtual machine to - communicate with the outside world. If you're not sure you know what - you're doing, this is the way to go. - - - If it is absent, then you must configure the tap device and whatever - arping and routing you will need on the host. However, even in this - case, the uml_net helper still needs to be in your path and it must be - setuid root if you're not running UML as root. This is because the - tap device doesn't support SIGIO, which UML needs in order to use - something as a source of input. So, the helper is used as a - convenient asynchronous IO thread. - - If you're using the uml_net helper, you can ignore the following host - setup - uml_net will do it for you. You just need to make sure you - have ethertap available, either built in to the host kernel or - available as a module. - - - If you want to set things up yourself, you need to make sure that the - appropriate /dev entry exists. If it doesn't, become root and create - it as follows: - - - mknod /dev/tap c 36 + 16 - - - - - For example, this is how to create /dev/tap0: - - - mknod /dev/tap0 c 36 0 + 16 - - - - - You also need to make sure that the host kernel has ethertap support. - If ethertap is enabled as a module, you apparently need to insmod - ethertap once for each ethertap device you want to enable. So, - - - host# - insmod ethertap - - - - - will give you the tap0 interface. To get the tap1 interface, you need - to run - - - host# - insmod ethertap unit=1 -o ethertap1 - - - - - - - - 66..99.. TThhee sswwiittcchh ddaaeemmoonn - - NNoottee: This is the daemon formerly known as uml_router, but which was - renamed so the network weenies of the world would stop growling at me. - - - The switch daemon, uml_switch, provides a mechanism for creating a - totally virtual network. By default, it provides no connection to the - host network (but see -tap, below). - - - The first thing you need to do is run the daemon. Running it with no - arguments will make it listen on a default pair of unix domain - sockets. - - - If you want it to listen on a different pair of sockets, use - - - -unix control socket data socket - - - - - - If you want it to act as a hub rather than a switch, use - - - -hub - - - - - - If you want the switch to be connected to host networking (allowing - the umls to get access to the outside world through the host), use - - - -tap tap0 - - - - - - Note that the tap device must be preconfigured (see "TUN/TAP with a - preconfigured tap device", above). If you're using a different tap - device than tap0, specify that instead of tap0. - - - uml_switch can be backgrounded as follows - - - host% - uml_switch [ options ] < /dev/null > /dev/null - - - - - The reason it doesn't background by default is that it listens to - stdin for EOF. When it sees that, it exits. - - - The general format of the kernel command line switch is - - - - ethn=daemon,ethernet address,socket - type,control socket,data socket - - - - - You can leave off everything except the 'daemon'. You only need to - specify the ethernet address if the one that will be assigned to it - isn't acceptable for some reason. The rest of the arguments describe - how to communicate with the daemon. You should only specify them if - you told the daemon to use different sockets than the default. So, if - you ran the daemon with no arguments, running the UML on the same - machine with - eth0=daemon - - - - - will cause the eth0 driver to attach itself to the daemon correctly. - - - - 66..1100.. SSlliipp - - Slip is another, less general, mechanism for a process to communicate - with the host networking. In contrast to the ethertap interface, - which exchanges ethernet frames with the host and can be used to - transport any higher-level protocol, it can only be used to transport - IP. - - - The general format of the command line switch is - - - - ethn=slip,slip IP - - - - - The slip IP argument is the IP address that will be assigned to the - host end of the slip device. If it is specified, the helper will run - and will set up the host so that the virtual machine can reach it and - the rest of the network. - - - There are some oddities with this interface that you should be aware - of. You should only specify one slip device on a given virtual - machine, and its name inside UML will be 'umn', not 'eth0' or whatever - you specified on the command line. These problems will be fixed at - some point. - - - - 66..1111.. SSlliirrpp - - slirp uses an external program, usually /usr/bin/slirp, to provide IP - only networking connectivity through the host. This is similar to IP - masquerading with a firewall, although the translation is performed in - user-space, rather than by the kernel. As slirp does not set up any - interfaces on the host, or changes routing, slirp does not require - root access or setuid binaries on the host. - - - The general format of the command line switch for slirp is: - - - - ethn=slirp,ethernet address,slirp path - - - - - The ethernet address is optional, as UML will set up the interface - with an ethernet address based upon the initial IP address of the - interface. The slirp path is generally /usr/bin/slirp, although it - will depend on distribution. - - - The slirp program can have a number of options passed to the command - line and we can't add them to the UML command line, as they will be - parsed incorrectly. Instead, a wrapper shell script can be written or - the options inserted into the /.slirprc file. More information on - all of the slirp options can be found in its man pages. - - - The eth0 interface on UML should be set up with the IP 10.2.0.15, - although you can use anything as long as it is not used by a network - you will be connecting to. The default route on UML should be set to - use - - - UML# - route add default dev eth0 - - - - - slirp provides a number of useful IP addresses which can be used by - UML, such as 10.0.2.3 which is an alias for the DNS server specified - in /etc/resolv.conf on the host or the IP given in the 'dns' option - for slirp. - - - Even with a baudrate setting higher than 115200, the slirp connection - is limited to 115200. If you need it to go faster, the slirp binary - needs to be compiled with FULL_BOLT defined in config.h. - - - - 66..1122.. ppccaapp - - The pcap transport is attached to a UML ethernet device on the command - line or with uml_mconsole with the following syntax: - - - - ethn=pcap,host interface,filter - expression,option1,option2 - - - - - The expression and options are optional. - - - The interface is whatever network device on the host you want to - sniff. The expression is a pcap filter expression, which is also what - tcpdump uses, so if you know how to specify tcpdump filters, you will - use the same expressions here. The options are up to two of - 'promisc', control whether pcap puts the host interface into - promiscuous mode. 'optimize' and 'nooptimize' control whether the pcap - expression optimizer is used. - - - Example: - - - - eth0=pcap,eth0,tcp - - eth1=pcap,eth0,!tcp - - - - will cause the UML eth0 to emit all tcp packets on the host eth0 and - the UML eth1 to emit all non-tcp packets on the host eth0. - - - - 66..1133.. SSeettttiinngg uupp tthhee hhoosstt yyoouurrsseellff - - If you don't specify an address for the host side of the ethertap or - slip device, UML won't do any setup on the host. So this is what is - needed to get things working (the examples use a host-side IP of - 192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your - own network): - - +o The device needs to be configured with its IP address. Tap devices - are also configured with an mtu of 1484. Slip devices are - configured with a point-to-point address pointing at the UML ip - address. - - - host# ifconfig tap0 arp mtu 1484 192.168.0.251 up - - - - - - - host# - ifconfig sl0 192.168.0.251 pointopoint 192.168.0.250 up - - - - - - +o If a tap device is being set up, a route is set to the UML IP. - - - UML# route add -host 192.168.0.250 gw 192.168.0.251 - - - - - - +o To allow other hosts on your network to see the virtual machine, - proxy arp is set up for it. - - - host# arp -Ds 192.168.0.250 eth0 pub - - - - - - +o Finally, the host is set up to route packets. - - - host# echo 1 > /proc/sys/net/ipv4/ip_forward - - - - - - - - - - - 77.. SShhaarriinngg FFiilleessyysstteemmss bbeettwweeeenn VViirrttuuaall MMaacchhiinneess - - - - - 77..11.. AA wwaarrnniinngg - - Don't attempt to share filesystems simply by booting two UMLs from the - same file. That's the same thing as booting two physical machines - from a shared disk. It will result in filesystem corruption. - - - - 77..22.. UUssiinngg llaayyeerreedd bblloocckk ddeevviicceess - - The way to share a filesystem between two virtual machines is to use - the copy-on-write (COW) layering capability of the ubd block driver. - As of 2.4.6-2um, the driver supports layering a read-write private - device over a read-only shared device. A machine's writes are stored - in the private device, while reads come from either device - the - private one if the requested block is valid in it, the shared one if - not. Using this scheme, the majority of data which is unchanged is - shared between an arbitrary number of virtual machines, each of which - has a much smaller file containing the changes that it has made. With - a large number of UMLs booting from a large root filesystem, this - leads to a huge disk space saving. It will also help performance, - since the host will be able to cache the shared data using a much - smaller amount of memory, so UML disk requests will be served from the - host's memory rather than its disks. - - - - - To add a copy-on-write layer to an existing block device file, simply - add the name of the COW file to the appropriate ubd switch: - - - ubd0=root_fs_cow,root_fs_debian_22 - - - - - where 'root_fs_cow' is the private COW file and 'root_fs_debian_22' is - the existing shared filesystem. The COW file need not exist. If it - doesn't, the driver will create and initialize it. Once the COW file - has been initialized, it can be used on its own on the command line: - - - ubd0=root_fs_cow - - - - - The name of the backing file is stored in the COW file header, so it - would be redundant to continue specifying it on the command line. - - - - 77..33.. NNoottee!! - - When checking the size of the COW file in order to see the gobs of - space that you're saving, make sure you use 'ls -ls' to see the actual - disk consumption rather than the length of the file. The COW file is - sparse, so the length will be very different from the disk usage. - Here is a 'ls -l' of a COW file and backing file from one boot and - shutdown: - host% ls -l cow.debian debian2.2 - -rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian - -rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2 - - - - - Doesn't look like much saved space, does it? Well, here's 'ls -ls': - - - host% ls -ls cow.debian debian2.2 - 880 -rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian - 525832 -rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2 - - - - - Now, you can see that the COW file has less than a meg of disk, rather - than 492 meg. - - - - 77..44.. AAnnootthheerr wwaarrnniinngg - - Once a filesystem is being used as a readonly backing file for a COW - file, do not boot directly from it or modify it in any way. Doing so - will invalidate any COW files that are using it. The mtime and size - of the backing file are stored in the COW file header at its creation, - and they must continue to match. If they don't, the driver will - refuse to use the COW file. - - - - - If you attempt to evade this restriction by changing either the - backing file or the COW header by hand, you will get a corrupted - filesystem. - - - - - Among other things, this means that upgrading the distribution in a - backing file and expecting that all of the COW files using it will see - the upgrade will not work. - - - - - 77..55.. uummll__mmoooo :: MMeerrggiinngg aa CCOOWW ffiillee wwiitthh iittss bbaacckkiinngg ffiillee - - Depending on how you use UML and COW devices, it may be advisable to - merge the changes in the COW file into the backing file every once in - a while. - - - - - The utility that does this is uml_moo. Its usage is - - - host% uml_moo COW file new backing file - - - - - There's no need to specify the backing file since that information is - already in the COW file header. If you're paranoid, boot the new - merged file, and if you're happy with it, move it over the old backing - file. - - - - - uml_moo creates a new backing file by default as a safety measure. It - also has a destructive merge option which will merge the COW file - directly into its current backing file. This is really only usable - when the backing file only has one COW file associated with it. If - there are multiple COWs associated with a backing file, a -d merge of - one of them will invalidate all of the others. However, it is - convenient if you're short of disk space, and it should also be - noticeably faster than a non-destructive merge. - - - - - uml_moo is installed with the UML deb and RPM. If you didn't install - UML from one of those packages, you can also get it from the UML - utilities tar file in tools/moo. - - - - - - - - - 88.. CCrreeaattiinngg ffiilleessyysstteemmss - - - You may want to create and mount new UML filesystems, either because - your root filesystem isn't large enough or because you want to use a - filesystem other than ext2. - - - This was written on the occasion of reiserfs being included in the - 2.4.1 kernel pool, and therefore the 2.4.1 UML, so the examples will - talk about reiserfs. This information is generic, and the examples - should be easy to translate to the filesystem of your choice. - - - 88..11.. CCrreeaattee tthhee ffiilleessyysstteemm ffiillee - - dd is your friend. All you need to do is tell dd to create an empty - file of the appropriate size. I usually make it sparse to save time - and to avoid allocating disk space until it's actually used. For - example, the following command will create a sparse 100 meg file full - of zeroes. - - - host% - dd if=/dev/zero of=new_filesystem seek=100 count=1 bs=1M - - - - - - - 88..22.. AAssssiiggnn tthhee ffiillee ttoo aa UUMMLL ddeevviiccee - - Add an argument like the following to the UML command line: - - ubd4=new_filesystem - - - - - making sure that you use an unassigned ubd device number. - - - - 88..33.. CCrreeaattiinngg aanndd mmoouunnttiinngg tthhee ffiilleessyysstteemm - - Make sure that the filesystem is available, either by being built into - the kernel, or available as a module, then boot up UML and log in. If - the root filesystem doesn't have the filesystem utilities (mkfs, fsck, - etc), then get them into UML by way of the net or hostfs. - - - Make the new filesystem on the device assigned to the new file: - - - host# mkreiserfs /dev/ubd/4 - - - <----------- MKREISERFSv2 -----------> - - ReiserFS version 3.6.25 - Block size 4096 bytes - Block count 25856 - Used blocks 8212 - Journal - 8192 blocks (18-8209), journal header is in block 8210 - Bitmaps: 17 - Root block 8211 - Hash function "r5" - ATTENTION: ALL DATA WILL BE LOST ON '/dev/ubd/4'! (y/n)y - journal size 8192 (from 18) - Initializing journal - 0%....20%....40%....60%....80%....100% - Syncing..done. - - - - - Now, mount it: - - - UML# - mount /dev/ubd/4 /mnt - - - - - and you're in business. - - - - - - - - - - 99.. HHoosstt ffiillee aacccceessss - - - If you want to access files on the host machine from inside UML, you - can treat it as a separate machine and either nfs mount directories - from the host or copy files into the virtual machine with scp or rcp. - However, since UML is running on the host, it can access those - files just like any other process and make them available inside the - virtual machine without needing to use the network. - - - This is now possible with the hostfs virtual filesystem. With it, you - can mount a host directory into the UML filesystem and access the - files contained in it just as you would on the host. - - - 99..11.. UUssiinngg hhoossttffss - - To begin with, make sure that hostfs is available inside the virtual - machine with - - - UML# cat /proc/filesystems - - - - . hostfs should be listed. If it's not, either rebuild the kernel - with hostfs configured into it or make sure that hostfs is built as a - module and available inside the virtual machine, and insmod it. - - - Now all you need to do is run mount: - - - UML# mount none /mnt/host -t hostfs - - - - - will mount the host's / on the virtual machine's /mnt/host. - - - If you don't want to mount the host root directory, then you can - specify a subdirectory to mount with the -o switch to mount: - - - UML# mount none /mnt/home -t hostfs -o /home - - - - - will mount the hosts's /home on the virtual machine's /mnt/home. - - - - 99..22.. hhoossttffss aass tthhee rroooott ffiilleessyysstteemm - - It's possible to boot from a directory hierarchy on the host using - hostfs rather than using the standard filesystem in a file. - - To start, you need that hierarchy. The easiest way is to loop mount - an existing root_fs file: - - - host# mount root_fs uml_root_dir -o loop - - - - - You need to change the filesystem type of / in etc/fstab to be - 'hostfs', so that line looks like this: - - /dev/ubd/0 / hostfs defaults 1 1 - - - - - Then you need to chown to yourself all the files in that directory - that are owned by root. This worked for me: - - - host# find . -uid 0 -exec chown jdike {} \; - - - - - Next, make sure that your UML kernel has hostfs compiled in, not as a - module. Then run UML with the boot device pointing at that directory: - - - ubd0=/path/to/uml/root/directory - - - - - UML should then boot as it does normally. - - - 99..33.. BBuuiillddiinngg hhoossttffss - - If you need to build hostfs because it's not in your kernel, you have - two choices: - - - - +o Compiling hostfs into the kernel: - - - Reconfigure the kernel and set the 'Host filesystem' option under - - - +o Compiling hostfs as a module: - - - Reconfigure the kernel and set the 'Host filesystem' option under - be in arch/um/fs/hostfs/hostfs.o. Install that in - /lib/modules/`uname -r`/fs in the virtual machine, boot it up, and - - - UML# insmod hostfs - - - - - - - - - - - - - 1100.. TThhee MMaannaaggeemmeenntt CCoonnssoollee - - - - The UML management console is a low-level interface to the kernel, - somewhat like the i386 SysRq interface. Since there is a full-blown - operating system under UML, there is much greater flexibility possible - than with the SysRq mechanism. - - - There are a number of things you can do with the mconsole interface: - - +o get the kernel version - - +o add and remove devices - - +o halt or reboot the machine - - +o Send SysRq commands - - +o Pause and resume the UML - - - You need the mconsole client (uml_mconsole) which is present in CVS - (/tools/mconsole) in 2.4.5-9um and later, and will be in the RPM in - 2.4.6. - - - You also need CONFIG_MCONSOLE (under 'General Setup') enabled in UML. - When you boot UML, you'll see a line like: - - - mconsole initialized on /home/jdike/.uml/umlNJ32yL/mconsole - - - - - If you specify a unique machine id one the UML command line, i.e. - - - umid=debian - - - - - you'll see this - - - mconsole initialized on /home/jdike/.uml/debian/mconsole - - - - - That file is the socket that uml_mconsole will use to communicate with - UML. Run it with either the umid or the full path as its argument: - - - host% uml_mconsole debian - - - - - or - - - host% uml_mconsole /home/jdike/.uml/debian/mconsole - - - - - You'll get a prompt, at which you can run one of these commands: - - +o version - - +o halt - - +o reboot - - +o config - - +o remove - - +o sysrq - - +o help - - +o cad - - +o stop - - +o go - - - 1100..11.. vveerrssiioonn - - This takes no arguments. It prints the UML version. - - - (mconsole) version - OK Linux usermode 2.4.5-9um #1 Wed Jun 20 22:47:08 EDT 2001 i686 - - - - - There are a couple actual uses for this. It's a simple no-op which - can be used to check that a UML is running. It's also a way of - sending an interrupt to the UML. This is sometimes useful on SMP - hosts, where there's a bug which causes signals to UML to be lost, - often causing it to appear to hang. Sending such a UML the mconsole - version command is a good way to 'wake it up' before networking has - been enabled, as it does not do anything to the function of the UML. - - - - 1100..22.. hhaalltt aanndd rreebboooott - - These take no arguments. They shut the machine down immediately, with - no syncing of disks and no clean shutdown of userspace. So, they are - pretty close to crashing the machine. - - - (mconsole) halt - OK - - - - - - - 1100..33.. ccoonnffiigg - - "config" adds a new device to the virtual machine. Currently the ubd - and network drivers support this. It takes one argument, which is the - device to add, with the same syntax as the kernel command line. - - - - - (mconsole) - config ubd3=/home/jdike/incoming/roots/root_fs_debian22 - - OK - (mconsole) config eth1=mcast - OK - - - - - - - 1100..44.. rreemmoovvee - - "remove" deletes a device from the system. Its argument is just the - name of the device to be removed. The device must be idle in whatever - sense the driver considers necessary. In the case of the ubd driver, - the removed block device must not be mounted, swapped on, or otherwise - open, and in the case of the network driver, the device must be down. - - - (mconsole) remove ubd3 - OK - (mconsole) remove eth1 - OK - - - - - - - 1100..55.. ssyyssrrqq - - This takes one argument, which is a single letter. It calls the - generic kernel's SysRq driver, which does whatever is called for by - that argument. See the SysRq documentation in Documentation/sysrq.txt - in your favorite kernel tree to see what letters are valid and what - they do. - - - - 1100..66.. hheellpp - - "help" returns a string listing the valid commands and what each one - does. - - - - 1100..77.. ccaadd - - This invokes the Ctl-Alt-Del action on init. What exactly this ends - up doing is up to /etc/inittab. Normally, it reboots the machine. - With UML, this is usually not desired, so if a halt would be better, - then find the section of inittab that looks like this - - - # What to do when CTRL-ALT-DEL is pressed. - ca:12345:ctrlaltdel:/sbin/shutdown -t1 -a -r now - - - - - and change the command to halt. - - - - 1100..88.. ssttoopp - - This puts the UML in a loop reading mconsole requests until a 'go' - mconsole command is received. This is very useful for making backups - of UML filesystems, as the UML can be stopped, then synced via 'sysrq - s', so that everything is written to the filesystem. You can then copy - the filesystem and then send the UML 'go' via mconsole. - - - Note that a UML running with more than one CPU will have problems - after you send the 'stop' command, as only one CPU will be held in a - mconsole loop and all others will continue as normal. This is a bug, - and will be fixed. - - - - 1100..99.. ggoo - - This resumes a UML after being paused by a 'stop' command. Note that - when the UML has resumed, TCP connections may have timed out and if - the UML is paused for a long period of time, crond might go a little - crazy, running all the jobs it didn't do earlier. - - - - - - - - - 1111.. KKeerrnneell ddeebbuuggggiinngg - - - NNoottee:: The interface that makes debugging, as described here, possible - is present in 2.4.0-test6 kernels and later. - - - Since the user-mode kernel runs as a normal Linux process, it is - possible to debug it with gdb almost like any other process. It is - slightly different because the kernel's threads are already being - ptraced for system call interception, so gdb can't ptrace them. - However, a mechanism has been added to work around that problem. - - - In order to debug the kernel, you need build it from source. See - ``Compiling the kernel and modules'' for information on doing that. - Make sure that you enable CONFIG_DEBUGSYM and CONFIG_PT_PROXY during - the config. These will compile the kernel with -g, and enable the - ptrace proxy so that gdb works with UML, respectively. - - - - - 1111..11.. SSttaarrttiinngg tthhee kkeerrnneell uunnddeerr ggddbb - - You can have the kernel running under the control of gdb from the - beginning by putting 'debug' on the command line. You will get an - xterm with gdb running inside it. The kernel will send some commands - to gdb which will leave it stopped at the beginning of start_kernel. - At this point, you can get things going with 'next', 'step', or - 'cont'. - - - There is a transcript of a debugging session here , with breakpoints being set in the scheduler and in an - interrupt handler. - 1111..22.. EExxaammiinniinngg sslleeeeppiinngg pprroocceesssseess - - Not every bug is evident in the currently running process. Sometimes, - processes hang in the kernel when they shouldn't because they've - deadlocked on a semaphore or something similar. In this case, when - you ^C gdb and get a backtrace, you will see the idle thread, which - isn't very relevant. - - - What you want is the stack of whatever process is sleeping when it - shouldn't be. You need to figure out which process that is, which is - generally fairly easy. Then you need to get its host process id, - which you can do either by looking at ps on the host or at - task.thread.extern_pid in gdb. - - - Now what you do is this: - - +o detach from the current thread - - - (UML gdb) det - - - - - - +o attach to the thread you are interested in - - - (UML gdb) att - - - - - - +o look at its stack and anything else of interest - - - (UML gdb) bt - - - - - Note that you can't do anything at this point that requires that a - process execute, e.g. calling a function - - +o when you're done looking at that process, reattach to the current - thread and continue it - - - (UML gdb) - att 1 - - - - - - - (UML gdb) - c - - - - - Here, specifying any pid which is not the process id of a UML thread - will cause gdb to reattach to the current thread. I commonly use 1, - but any other invalid pid would work. - - - - 1111..33.. RRuunnnniinngg dddddd oonn UUMMLL - - ddd works on UML, but requires a special kludge. The process goes - like this: - - +o Start ddd - - - host% ddd linux - - - - - - +o With ps, get the pid of the gdb that ddd started. You can ask the - gdb to tell you, but for some reason that confuses things and - causes a hang. - - +o run UML with 'debug=parent gdb-pid=' added to the command line - - it will just sit there after you hit return - - +o type 'att 1' to the ddd gdb and you will see something like - - - 0xa013dc51 in __kill () - - - (gdb) - - - - - - +o At this point, type 'c', UML will boot up, and you can use ddd just - as you do on any other process. - - - - 1111..44.. DDeebbuuggggiinngg mmoodduulleess - - gdb has support for debugging code which is dynamically loaded into - the process. This support is what is needed to debug kernel modules - under UML. - - - Using that support is somewhat complicated. You have to tell gdb what - object file you just loaded into UML and where in memory it is. Then, - it can read the symbol table, and figure out where all the symbols are - from the load address that you provided. It gets more interesting - when you load the module again (i.e. after an rmmod). You have to - tell gdb to forget about all its symbols, including the main UML ones - for some reason, then load then all back in again. - - - There's an easy way and a hard way to do this. The easy way is to use - the umlgdb expect script written by Chandan Kudige. It basically - automates the process for you. - - - First, you must tell it where your modules are. There is a list in - the script that looks like this: - set MODULE_PATHS { - "fat" "/usr/src/uml/linux-2.4.18/fs/fat/fat.o" - "isofs" "/usr/src/uml/linux-2.4.18/fs/isofs/isofs.o" - "minix" "/usr/src/uml/linux-2.4.18/fs/minix/minix.o" - } - - - - - You change that to list the names and paths of the modules that you - are going to debug. Then you run it from the toplevel directory of - your UML pool and it basically tells you what to do: - - - - - ******** GDB pid is 21903 ******** - Start UML as: ./linux debug gdb-pid=21903 - - - - GNU gdb 5.0rh-5 Red Hat Linux 7.1 - Copyright 2001 Free Software Foundation, Inc. - GDB is free software, covered by the GNU General Public License, and you are - welcome to change it and/or distribute copies of it under certain conditions. - Type "show copying" to see the conditions. - There is absolutely no warranty for GDB. Type "show warranty" for details. - This GDB was configured as "i386-redhat-linux"... - (gdb) b sys_init_module - Breakpoint 1 at 0xa0011923: file module.c, line 349. - (gdb) att 1 - - - - - After you run UML and it sits there doing nothing, you hit return at - the 'att 1' and continue it: - - - Attaching to program: /home/jdike/linux/2.4/um/./linux, process 1 - 0xa00f4221 in __kill () - (UML gdb) c - Continuing. - - - - - At this point, you debug normally. When you insmod something, the - expect magic will kick in and you'll see something like: - - - - - - - - - - - - - - - - - - *** Module hostfs loaded *** - Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs", - mod_user=0x8070e00) at module.c:349 - 349 char *name, *n_name, *name_tmp = NULL; - (UML gdb) finish - Run till exit from #0 sys_init_module (name_user=0x805abb0 "hostfs", - mod_user=0x8070e00) at module.c:349 - 0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411 - 411 else res = EXECUTE_SYSCALL(syscall, regs); - Value returned is $1 = 0 - (UML gdb) - p/x (int)module_list + module_list->size_of_struct - - $2 = 0xa9021054 - (UML gdb) symbol-file ./linux - Load new symbol table from "./linux"? (y or n) y - Reading symbols from ./linux... - done. - (UML gdb) - add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054 - - add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at - .text_addr = 0xa9021054 - (y or n) y - - Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o... - done. - (UML gdb) p *module_list - $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs", - size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1, - nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0, - init = 0xa90221f0 , cleanup = 0xa902222c , - ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0, - persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0, - kallsyms_end = 0x0, - archdata_start = 0x1b855
, - archdata_end = 0xe5890000
, - kernel_data = 0xf689c35d
} - >> Finished loading symbols for hostfs ... - - - - - That's the easy way. It's highly recommended. The hard way is - described below in case you're interested in what's going on. - - - Boot the kernel under the debugger and load the module with insmod or - modprobe. With gdb, do: - - - (UML gdb) p module_list - - - - - This is a list of modules that have been loaded into the kernel, with - the most recently loaded module first. Normally, the module you want - is at module_list. If it's not, walk down the next links, looking at - the name fields until find the module you want to debug. Take the - address of that structure, and add module.size_of_struct (which in - 2.4.10 kernels is 96 (0x60)) to it. Gdb can make this hard addition - for you :-): - - - - (UML gdb) - printf "%#x\n", (int)module_list module_list->size_of_struct - - - - - The offset from the module start occasionally changes (before 2.4.0, - it was module.size_of_struct + 4), so it's a good idea to check the - init and cleanup addresses once in a while, as describe below. Now - do: - - - (UML gdb) - add-symbol-file /path/to/module/on/host that_address - - - - - Tell gdb you really want to do it, and you're in business. - - - If there's any doubt that you got the offset right, like breakpoints - appear not to work, or they're appearing in the wrong place, you can - check it by looking at the module structure. The init and cleanup - fields should look like: - - - init = 0x588066b0 , cleanup = 0x588066c0 - - - - - with no offsets on the symbol names. If the names are right, but they - are offset, then the offset tells you how much you need to add to the - address you gave to add-symbol-file. - - - When you want to load in a new version of the module, you need to get - gdb to forget about the old one. The only way I've found to do that - is to tell gdb to forget about all symbols that it knows about: - - - (UML gdb) symbol-file - - - - - Then reload the symbols from the kernel binary: - - - (UML gdb) symbol-file /path/to/kernel - - - - - and repeat the process above. You'll also need to re-enable break- - points. They were disabled when you dumped all the symbols because - gdb couldn't figure out where they should go. - - - - 1111..55.. AAttttaacchhiinngg ggddbb ttoo tthhee kkeerrnneell - - If you don't have the kernel running under gdb, you can attach gdb to - it later by sending the tracing thread a SIGUSR1. The first line of - the console output identifies its pid: - tracing thread pid = 20093 - - - - - When you send it the signal: - - - host% kill -USR1 20093 - - - - - you will get an xterm with gdb running in it. - - - If you have the mconsole compiled into UML, then the mconsole client - can be used to start gdb: - - - (mconsole) (mconsole) config gdb=xterm - - - - - will fire up an xterm with gdb running in it. - - - - 1111..66.. UUssiinngg aalltteerrnnaattee ddeebbuuggggeerrss - - UML has support for attaching to an already running debugger rather - than starting gdb itself. This is present in CVS as of 17 Apr 2001. - I sent it to Alan for inclusion in the ac tree, and it will be in my - 2.4.4 release. - - - This is useful when gdb is a subprocess of some UI, such as emacs or - ddd. It can also be used to run debuggers other than gdb on UML. - Below is an example of using strace as an alternate debugger. - - - To do this, you need to get the pid of the debugger and pass it in - with the - - - If you are using gdb under some UI, then tell it to 'att 1', and - you'll find yourself attached to UML. - - - If you are using something other than gdb as your debugger, then - you'll need to get it to do the equivalent of 'att 1' if it doesn't do - it automatically. - - - An example of an alternate debugger is strace. You can strace the - actual kernel as follows: - - +o Run the following in a shell - - - host% - sh -c 'echo pid=$$; echo -n hit return; read x; exec strace -p 1 -o strace.out' - - - - +o Run UML with 'debug' and 'gdb-pid=' with the pid printed out - by the previous command - - +o Hit return in the shell, and UML will start running, and strace - output will start accumulating in the output file. - - Note that this is different from running - - - host% strace ./linux - - - - - That will strace only the main UML thread, the tracing thread, which - doesn't do any of the actual kernel work. It just oversees the vir- - tual machine. In contrast, using strace as described above will show - you the low-level activity of the virtual machine. - - - - - - 1122.. KKeerrnneell ddeebbuuggggiinngg eexxaammpplleess - - 1122..11.. TThhee ccaassee ooff tthhee hhuunngg ffsscckk - - When booting up the kernel, fsck failed, and dropped me into a shell - to fix things up. I ran fsck -y, which hung: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Setting hostname uml [ OK ] - Checking root filesystem - /dev/fhd0 was not cleanly unmounted, check forced. - Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. - - /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY. - (i.e., without -a or -p options) - [ FAILED ] - - *** An error occurred during the file system check. - *** Dropping you to a shell; the system will reboot - *** when you leave the shell. - Give root password for maintenance - (or type Control-D for normal startup): - - [root@uml /root]# fsck -y /dev/fhd0 - fsck -y /dev/fhd0 - Parallelizing fsck version 1.14 (9-Jan-1999) - e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09 - /dev/fhd0 contains a file system with errors, check forced. - Pass 1: Checking inodes, blocks, and sizes - Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes - - Inode 19780, i_blocks is 1548, should be 540. Fix? yes - - Pass 2: Checking directory structure - Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes - - Directory inode 11858, block 0, offset 0: directory corrupted - Salvage? yes - - Missing '.' in directory inode 11858. - Fix? yes - - Missing '..' in directory inode 11858. - Fix? yes - - - - - - The standard drill in this sort of situation is to fire up gdb on the - signal thread, which, in this case, was pid 1935. In another window, - I run gdb and attach pid 1935. - - - - - ~/linux/2.3.26/um 1016: gdb linux - GNU gdb 4.17.0.11 with Linux support - Copyright 1998 Free Software Foundation, Inc. - GDB is free software, covered by the GNU General Public License, and you are - welcome to change it and/or distribute copies of it under certain conditions. - Type "show copying" to see the conditions. - There is absolutely no warranty for GDB. Type "show warranty" for details. - This GDB was configured as "i386-redhat-linux"... - - (gdb) att 1935 - Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1935 - 0x100756d9 in __wait4 () - - - - - - - Let's see what's currently running: - - - - (gdb) p current_task.pid - $1 = 0 - - - - - - It's the idle thread, which means that fsck went to sleep for some - reason and never woke up. - - - Let's guess that the last process in the process list is fsck: - - - - (gdb) p current_task.prev_task.comm - $13 = "fsck.ext2\000\000\000\000\000\000" - - - - - - It is, so let's see what it thinks it's up to: - - - - (gdb) p current_task.prev_task.thread - $14 = {extern_pid = 1980, tracing = 0, want_tracing = 0, forking = 0, - kernel_stack_page = 0, signal_stack = 1342627840, syscall = {id = 4, args = { - 3, 134973440, 1024, 0, 1024}, have_result = 0, result = 50590720}, - request = {op = 2, u = {exec = {ip = 1350467584, sp = 2952789424}, fork = { - regs = {1350467584, 2952789424, 0 }, sigstack = 0, - pid = 0}, switch_to = 0x507e8000, thread = {proc = 0x507e8000, - arg = 0xaffffdb0, flags = 0, new_pid = 0}, input_request = { - op = 1350467584, fd = -1342177872, proc = 0, pid = 0}}}} - - - - - - The interesting things here are the fact that its .thread.syscall.id - is __NR_write (see the big switch in arch/um/kernel/syscall_kern.c or - the defines in include/asm-um/arch/unistd.h), and that it never - returned. Also, its .request.op is OP_SWITCH (see - arch/um/include/user_util.h). These mean that it went into a write, - and, for some reason, called schedule(). - - - The fact that it never returned from write means that its stack should - be fairly interesting. Its pid is 1980 (.thread.extern_pid). That - process is being ptraced by the signal thread, so it must be detached - before gdb can attach it: - - - - - - - - - - - (gdb) call detach(1980) - - Program received signal SIGSEGV, Segmentation fault. - - The program being debugged stopped while in a function called from GDB. - When the function (detach) is done executing, GDB will silently - stop (instead of continuing to evaluate the expression containing - the function call). - (gdb) call detach(1980) - $15 = 0 - - - - - - The first detach segfaults for some reason, and the second one - succeeds. - - - Now I detach from the signal thread, attach to the fsck thread, and - look at its stack: - - - (gdb) det - Detaching from program: /home/dike/linux/2.3.26/um/linux Pid 1935 - (gdb) att 1980 - Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1980 - 0x10070451 in __kill () - (gdb) bt - #0 0x10070451 in __kill () - #1 0x10068ccd in usr1_pid (pid=1980) at process.c:30 - #2 0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000) - at process_kern.c:156 - #3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000) - at process_kern.c:161 - #4 0x10001d12 in schedule () at sched.c:777 - #5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71 - #6 0x1006aa10 in __down_failed () at semaphore.c:157 - #7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174 - #8 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182 - #9 - #10 0x10155404 in errno () - #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50 - #12 0x1006c5d8 in segv_handler (sc=0x5006eaf8) at trap_user.c:174 - #13 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182 - #14 - #15 0xc0fd in ?? () - #16 0x10016647 in sys_write (fd=3, - buf=0x80b8800
, count=1024) - at read_write.c:159 - #17 0x1006d5b3 in execute_syscall (syscall=4, args=0x5006ef08) - at syscall_kern.c:254 - #18 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35 - #19 - #20 0x400dc8b0 in ?? () - - - - - - The interesting things here are : - - +o There are two segfaults on this stack (frames 9 and 14) - - +o The first faulting address (frame 11) is 0x50000800 - - (gdb) p (void *)1342179328 - $16 = (void *) 0x50000800 - - - - - - The initial faulting address is interesting because it is on the idle - thread's stack. I had been seeing the idle thread segfault for no - apparent reason, and the cause looked like stack corruption. In hopes - of catching the culprit in the act, I had turned off all protections - to that stack while the idle thread wasn't running. This apparently - tripped that trap. - - - However, the more immediate problem is that second segfault and I'm - going to concentrate on that. First, I want to see where the fault - happened, so I have to go look at the sigcontent struct in frame 8: - - - - (gdb) up - #1 0x10068ccd in usr1_pid (pid=1980) at process.c:30 - 30 kill(pid, SIGUSR1); - (gdb) - #2 0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000) - at process_kern.c:156 - 156 usr1_pid(getpid()); - (gdb) - #3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000) - at process_kern.c:161 - 161 _switch_to(prev, next); - (gdb) - #4 0x10001d12 in schedule () at sched.c:777 - 777 switch_to(prev, next, prev); - (gdb) - #5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71 - 71 schedule(); - (gdb) - #6 0x1006aa10 in __down_failed () at semaphore.c:157 - 157 } - (gdb) - #7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174 - 174 segv(sc->cr2, sc->err & 2); - (gdb) - #8 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182 - 182 segv_handler(sc); - (gdb) p *sc - Cannot access memory at address 0x0. - - - - - That's not very useful, so I'll try a more manual method: - - - (gdb) p *((struct sigcontext *) (&sig + 1)) - $19 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43, - __dsh = 0, edi = 1342179328, esi = 1350378548, ebp = 1342630440, - esp = 1342630420, ebx = 1348150624, edx = 1280, ecx = 0, eax = 0, - trapno = 14, err = 4, eip = 268480945, cs = 35, __csh = 0, eflags = 66118, - esp_at_signal = 1342630420, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0, - cr2 = 1280} - - - - The ip is in handle_mm_fault: - - - (gdb) p (void *)268480945 - $20 = (void *) 0x1000b1b1 - (gdb) i sym $20 - handle_mm_fault + 57 in section .text - - - - - - Specifically, it's in pte_alloc: - - - (gdb) i line *$20 - Line 124 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b1b1 - and ends at 0x1000b1b7 . - - - - - - To find where in handle_mm_fault this is, I'll jump forward in the - code until I see an address in that procedure: - - - - (gdb) i line *0x1000b1c0 - Line 126 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b1b7 - and ends at 0x1000b1c3 . - (gdb) i line *0x1000b1d0 - Line 131 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b1d0 - and ends at 0x1000b1da . - (gdb) i line *0x1000b1e0 - Line 61 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b1da - and ends at 0x1000b1e1 . - (gdb) i line *0x1000b1f0 - Line 134 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b1f0 - and ends at 0x1000b200 . - (gdb) i line *0x1000b200 - Line 135 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b200 - and ends at 0x1000b208 . - (gdb) i line *0x1000b210 - Line 139 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" - starts at address 0x1000b210 - and ends at 0x1000b219 . - (gdb) i line *0x1000b220 - Line 1168 of "memory.c" starts at address 0x1000b21e - and ends at 0x1000b222 . - - - - - - Something is apparently wrong with the page tables or vma_structs, so - lets go back to frame 11 and have a look at them: - - - - #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50 - 50 handle_mm_fault(current, vma, address, is_write); - (gdb) call pgd_offset_proc(vma->vm_mm, address) - $22 = (pgd_t *) 0x80a548c - - - - - - That's pretty bogus. Page tables aren't supposed to be in process - text or data areas. Let's see what's in the vma: - - - (gdb) p *vma - $23 = {vm_mm = 0x507d2434, vm_start = 0, vm_end = 134512640, - vm_next = 0x80a4f8c, vm_page_prot = {pgprot = 0}, vm_flags = 31200, - vm_avl_height = 2058, vm_avl_left = 0x80a8c94, vm_avl_right = 0x80d1000, - vm_next_share = 0xaffffdb0, vm_pprev_share = 0xaffffe63, - vm_ops = 0xaffffe7a, vm_pgoff = 2952789626, vm_file = 0xafffffec, - vm_private_data = 0x62} - (gdb) p *vma.vm_mm - $24 = {mmap = 0x507d2434, mmap_avl = 0x0, mmap_cache = 0x8048000, - pgd = 0x80a4f8c, mm_users = {counter = 0}, mm_count = {counter = 134904288}, - map_count = 134909076, mmap_sem = {count = {counter = 135073792}, - sleepers = -1342177872, wait = {lock = , - task_list = {next = 0xaffffe63, prev = 0xaffffe7a}, - __magic = -1342177670, __creator = -1342177300}, __magic = 98}, - page_table_lock = {}, context = 138, start_code = 0, end_code = 0, - start_data = 0, end_data = 0, start_brk = 0, brk = 0, start_stack = 0, - arg_start = 0, arg_end = 0, env_start = 0, env_end = 0, rss = 1350381536, - total_vm = 0, locked_vm = 0, def_flags = 0, cpu_vm_mask = 0, swap_cnt = 0, - swap_address = 0, segments = 0x0} - - - - - - This also pretty bogus. With all of the 0x80xxxxx and 0xaffffxxx - addresses, this is looking like a stack was plonked down on top of - these structures. Maybe it's a stack overflow from the next page: - - - - (gdb) p vma - $25 = (struct vm_area_struct *) 0x507d2434 - - - - - - That's towards the lower quarter of the page, so that would have to - have been pretty heavy stack overflow: - - - - - - - - - - - - - - - (gdb) x/100x $25 - 0x507d2434: 0x507d2434 0x00000000 0x08048000 0x080a4f8c - 0x507d2444: 0x00000000 0x080a79e0 0x080a8c94 0x080d1000 - 0x507d2454: 0xaffffdb0 0xaffffe63 0xaffffe7a 0xaffffe7a - 0x507d2464: 0xafffffec 0x00000062 0x0000008a 0x00000000 - 0x507d2474: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2484: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2494: 0x00000000 0x00000000 0x507d2fe0 0x00000000 - 0x507d24a4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d24b4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d24c4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d24d4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d24e4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d24f4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2504: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2514: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2524: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2534: 0x00000000 0x00000000 0x507d25dc 0x00000000 - 0x507d2544: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2554: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2564: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2574: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2584: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d2594: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d25a4: 0x00000000 0x00000000 0x00000000 0x00000000 - 0x507d25b4: 0x00000000 0x00000000 0x00000000 0x00000000 - - - - - - It's not stack overflow. The only "stack-like" piece of this data is - the vma_struct itself. - - - At this point, I don't see any avenues to pursue, so I just have to - admit that I have no idea what's going on. What I will do, though, is - stick a trap on the segfault handler which will stop if it sees any - writes to the idle thread's stack. That was the thing that happened - first, and it may be that if I can catch it immediately, what's going - on will be somewhat clearer. - - - 1122..22.. EEppiissooddee 22:: TThhee ccaassee ooff tthhee hhuunngg ffsscckk - - After setting a trap in the SEGV handler for accesses to the signal - thread's stack, I reran the kernel. - - - fsck hung again, this time by hitting the trap: - - - - - - - - - - - - - - - - - Setting hostname uml [ OK ] - Checking root filesystem - /dev/fhd0 contains a file system with errors, check forced. - Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. - - /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY. - (i.e., without -a or -p options) - [ FAILED ] - - *** An error occurred during the file system check. - *** Dropping you to a shell; the system will reboot - *** when you leave the shell. - Give root password for maintenance - (or type Control-D for normal startup): - - [root@uml /root]# fsck -y /dev/fhd0 - fsck -y /dev/fhd0 - Parallelizing fsck version 1.14 (9-Jan-1999) - e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09 - /dev/fhd0 contains a file system with errors, check forced. - Pass 1: Checking inodes, blocks, and sizes - Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes - - Pass 2: Checking directory structure - Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes - - Directory inode 11858, block 0, offset 0: directory corrupted - Salvage? yes - - Missing '.' in directory inode 11858. - Fix? yes - - Missing '..' in directory inode 11858. - Fix? yes - - Untested (4127) [100fe44c]: trap_kern.c line 31 - - - - - - I need to get the signal thread to detach from pid 4127 so that I can - attach to it with gdb. This is done by sending it a SIGUSR1, which is - caught by the signal thread, which detaches the process: - - - kill -USR1 4127 - - - - - - Now I can run gdb on it: - - - - - - - - - - - - - - ~/linux/2.3.26/um 1034: gdb linux - GNU gdb 4.17.0.11 with Linux support - Copyright 1998 Free Software Foundation, Inc. - GDB is free software, covered by the GNU General Public License, and you are - welcome to change it and/or distribute copies of it under certain conditions. - Type "show copying" to see the conditions. - There is absolutely no warranty for GDB. Type "show warranty" for details. - This GDB was configured as "i386-redhat-linux"... - (gdb) att 4127 - Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127 - 0x10075891 in __libc_nanosleep () - - - - - - The backtrace shows that it was in a write and that the fault address - (address in frame 3) is 0x50000800, which is right in the middle of - the signal thread's stack page: - - - (gdb) bt - #0 0x10075891 in __libc_nanosleep () - #1 0x1007584d in __sleep (seconds=1000000) - at ../sysdeps/unix/sysv/linux/sleep.c:78 - #2 0x1006ce9a in stop () at user_util.c:191 - #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31 - #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174 - #5 0x1006c63c in kern_segv_handler (sig=11) at trap_user.c:182 - #6 - #7 0xc0fd in ?? () - #8 0x10016647 in sys_write (fd=3, buf=0x80b8800 "R.", count=1024) - at read_write.c:159 - #9 0x1006d603 in execute_syscall (syscall=4, args=0x5006ef08) - at syscall_kern.c:254 - #10 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35 - #11 - #12 0x400dc8b0 in ?? () - #13 - #14 0x400dc8b0 in ?? () - #15 0x80545fd in ?? () - #16 0x804daae in ?? () - #17 0x8054334 in ?? () - #18 0x804d23e in ?? () - #19 0x8049632 in ?? () - #20 0x80491d2 in ?? () - #21 0x80596b5 in ?? () - (gdb) p (void *)1342179328 - $3 = (void *) 0x50000800 - - - - - - Going up the stack to the segv_handler frame and looking at where in - the code the access happened shows that it happened near line 110 of - block_dev.c: - - - - - - - - - - (gdb) up - #1 0x1007584d in __sleep (seconds=1000000) - at ../sysdeps/unix/sysv/linux/sleep.c:78 - ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory. - (gdb) - #2 0x1006ce9a in stop () at user_util.c:191 - 191 while(1) sleep(1000000); - (gdb) - #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31 - 31 KERN_UNTESTED(); - (gdb) - #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174 - 174 segv(sc->cr2, sc->err & 2); - (gdb) p *sc - $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43, - __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484, - esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14, - err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070, - esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0, - cr2 = 1342179328} - (gdb) p (void *)268550834 - $2 = (void *) 0x1001c2b2 - (gdb) i sym $2 - block_write + 1090 in section .text - (gdb) i line *$2 - Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h" - starts at address 0x1001c2a1 - and ends at 0x1001c2bf . - (gdb) i line *0x1001c2c0 - Line 110 of "block_dev.c" starts at address 0x1001c2bf - and ends at 0x1001c2e3 . - - - - - - Looking at the source shows that the fault happened during a call to - copy_to_user to copy the data into the kernel: - - - 107 count -= chars; - 108 copy_from_user(p,buf,chars); - 109 p += chars; - 110 buf += chars; - - - - - - p is the pointer which must contain 0x50000800, since buf contains - 0x80b8800 (frame 8 above). It is defined as: - - - p = offset + bh->b_data; - - - - - - I need to figure out what bh is, and it just so happens that bh is - passed as an argument to mark_buffer_uptodate and mark_buffer_dirty a - few lines later, so I do a little disassembly: - - - - - (gdb) disas 0x1001c2bf 0x1001c2e0 - Dump of assembler code from 0x1001c2bf to 0x1001c2d0: - 0x1001c2bf : addl %eax,0xc(%ebp) - 0x1001c2c2 : movl 0xfffffdd4(%ebp),%edx - 0x1001c2c8 : btsl $0x0,0x18(%edx) - 0x1001c2cd : btsl $0x1,0x18(%edx) - 0x1001c2d2 : sbbl %ecx,%ecx - 0x1001c2d4 : testl %ecx,%ecx - 0x1001c2d6 : jne 0x1001c2e3 - 0x1001c2d8 : pushl $0x0 - 0x1001c2da : pushl %edx - 0x1001c2db : call 0x1001819c <__mark_buffer_dirty> - End of assembler dump. - - - - - - At that point, bh is in %edx (address 0x1001c2da), which is calculated - at 0x1001c2c2 as %ebp + 0xfffffdd4, so I figure exactly what that is, - taking %ebp from the sigcontext_struct above: - - - (gdb) p (void *)1342631484 - $5 = (void *) 0x5006ee3c - (gdb) p 0x5006ee3c+0xfffffdd4 - $6 = 1342630928 - (gdb) p (void *)$6 - $7 = (void *) 0x5006ec10 - (gdb) p *((void **)$7) - $8 = (void *) 0x50100200 - - - - - - Now, I look at the structure to see what's in it, and particularly, - what its b_data field contains: - - - (gdb) p *((struct buffer_head *)0x50100200) - $13 = {b_next = 0x50289380, b_blocknr = 49405, b_size = 1024, b_list = 0, - b_dev = 15872, b_count = {counter = 1}, b_rdev = 15872, b_state = 24, - b_flushtime = 0, b_next_free = 0x501001a0, b_prev_free = 0x50100260, - b_this_page = 0x501001a0, b_reqnext = 0x0, b_pprev = 0x507fcf58, - b_data = 0x50000800 "", b_page = 0x50004000, - b_end_io = 0x10017f60 , b_dev_id = 0x0, - b_rsector = 98810, b_wait = {lock = , - task_list = {next = 0x50100248, prev = 0x50100248}, __magic = 1343226448, - __creator = 0}, b_kiobuf = 0x0} - - - - - - The b_data field is indeed 0x50000800, so the question becomes how - that happened. The rest of the structure looks fine, so this probably - is not a case of data corruption. It happened on purpose somehow. - - - The b_page field is a pointer to the page_struct representing the - 0x50000000 page. Looking at it shows the kernel's idea of the state - of that page: - - - - (gdb) p *$13.b_page - $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0, - index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = { - next = 0x50008460, prev = 0x50019350}, wait = { - lock = , task_list = {next = 0x50004024, - prev = 0x50004024}, __magic = 1342193708, __creator = 0}, - pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280, - zone = 0x100c5160} - - - - - - Some sanity-checking: the virtual field shows the "virtual" address of - this page, which in this kernel is the same as its "physical" address, - and the page_struct itself should be mem_map[0], since it represents - the first page of memory: - - - - (gdb) p (void *)1342177280 - $18 = (void *) 0x50000000 - (gdb) p mem_map - $19 = (mem_map_t *) 0x50004000 - - - - - - These check out fine. - - - Now to check out the page_struct itself. In particular, the flags - field shows whether the page is considered free or not: - - - (gdb) p (void *)132 - $21 = (void *) 0x84 - - - - - - The "reserved" bit is the high bit, which is definitely not set, so - the kernel considers the signal stack page to be free and available to - be used. - - - At this point, I jump to conclusions and start looking at my early - boot code, because that's where that page is supposed to be reserved. - - - In my setup_arch procedure, I have the following code which looks just - fine: - - - - bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn); - free_bootmem(__pa(low_physmem) + bootmap_size, high_physmem - low_physmem); - - - - - - Two stack pages have already been allocated, and low_physmem points to - the third page, which is the beginning of free memory. - The init_bootmem call declares the entire memory to the boot memory - manager, which marks it all reserved. The free_bootmem call frees up - all of it, except for the first two pages. This looks correct to me. - - - So, I decide to see init_bootmem run and make sure that it is marking - those first two pages as reserved. I never get that far. - - - Stepping into init_bootmem, and looking at bootmem_map before looking - at what it contains shows the following: - - - - (gdb) p bootmem_map - $3 = (void *) 0x50000000 - - - - - - Aha! The light dawns. That first page is doing double duty as a - stack and as the boot memory map. The last thing that the boot memory - manager does is to free the pages used by its memory map, so this page - is getting freed even its marked as reserved. - - - The fix was to initialize the boot memory manager before allocating - those two stack pages, and then allocate them through the boot memory - manager. After doing this, and fixing a couple of subsequent buglets, - the stack corruption problem disappeared. - - - - - - 1133.. WWhhaatt ttoo ddoo wwhheenn UUMMLL ddooeessnn''tt wwoorrkk - - - - - 1133..11.. SSttrraannggee ccoommppiillaattiioonn eerrrroorrss wwhheenn yyoouu bbuuiilldd ffrroomm ssoouurrccee - - As of test11, it is necessary to have "ARCH=um" in the environment or - on the make command line for all steps in building UML, including - clean, distclean, or mrproper, config, menuconfig, or xconfig, dep, - and linux. If you forget for any of them, the i386 build seems to - contaminate the UML build. If this happens, start from scratch with - - - host% - make mrproper ARCH=um - - - - - and repeat the build process with ARCH=um on all the steps. - - - See ``Compiling the kernel and modules'' for more details. - - - Another cause of strange compilation errors is building UML in - /usr/src/linux. If you do this, the first thing you need to do is - clean up the mess you made. The /usr/src/linux/asm link will now - point to /usr/src/linux/asm-um. Make it point back to - /usr/src/linux/asm-i386. Then, move your UML pool someplace else and - build it there. Also see below, where a more specific set of symptoms - is described. - - - - 1133..33.. AA vvaarriieettyy ooff ppaanniiccss aanndd hhaannggss wwiitthh //ttmmpp oonn aa rreeiisseerrffss ffiilleessyyss-- - tteemm - - I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27. - Panics preceded by - - - Detaching pid nnnn - - - - are diagnostic of this problem. This is a reiserfs bug which causes a - thread to occasionally read stale data from a mmapped page shared with - another thread. The fix is to upgrade the filesystem or to have /tmp - be an ext2 filesystem. - - - - 1133..44.. TThhee ccoommppiillee ffaaiillss wwiitthh eerrrroorrss aabboouutt ccoonnfflliiccttiinngg ttyyppeess ffoorr - ''ooppeenn'',, ''dduupp'',, aanndd ''wwaaiittppiidd'' - - This happens when you build in /usr/src/linux. The UML build makes - the include/asm link point to include/asm-um. /usr/include/asm points - to /usr/src/linux/include/asm, so when that link gets moved, files - which need to include the asm-i386 versions of headers get the - incompatible asm-um versions. The fix is to move the include/asm link - back to include/asm-i386 and to do UML builds someplace else. - - - - 1133..55.. UUMMLL ddooeessnn''tt wwoorrkk wwhheenn //ttmmpp iiss aann NNFFSS ffiilleessyysstteemm - - This seems to be a similar situation with the ReiserFS problem above. - Some versions of NFS seems not to handle mmap correctly, which UML - depends on. The workaround is have /tmp be a non-NFS directory. - - - 1133..66.. UUMMLL hhaannggss oonn bboooott wwhheenn ccoommppiilleedd wwiitthh ggpprrooff ssuuppppoorrtt - - If you build UML with gprof support and, early in the boot, it does - this - - - kernel BUG at page_alloc.c:100! - - - - - you have a buggy gcc. You can work around the problem by removing - UM_FASTCALL from CFLAGS in arch/um/Makefile-i386. This will open up - another bug, but that one is fairly hard to reproduce. - - - - 1133..77.. ssyyssllooggdd ddiieess wwiitthh aa SSIIGGTTEERRMM oonn ssttaarrttuupp - - The exact boot error depends on the distribution that you're booting, - but Debian produces this: - - - /etc/rc2.d/S10sysklogd: line 49: 93 Terminated - start-stop-daemon --start --quiet --exec /sbin/syslogd -- $SYSLOGD - - - - - This is a syslogd bug. There's a race between a parent process - installing a signal handler and its child sending the signal. See - this uml-devel post for the details. - - - - 1133..88.. TTUUNN//TTAAPP nneettwwoorrkkiinngg ddooeessnn''tt wwoorrkk oonn aa 22..44 hhoosstt - - There are a couple of problems which were - name="pointed - out"> by Tim Robinson - - +o It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier. - The fix is to upgrade to something more recent and then read the - next item. - - +o If you see - - - File descriptor in bad state - - - - when you bring up the device inside UML, you have a header mismatch - between the original kernel and the upgraded one. Make /usr/src/linux - point at the new headers. This will only be a problem if you build - uml_net yourself. - - - - 1133..99.. YYoouu ccaann nneettwwoorrkk ttoo tthhee hhoosstt bbuutt nnoott ttoo ootthheerr mmaacchhiinneess oonn tthhee - nneett - - If you can connect to the host, and the host can connect to UML, but - you cannot connect to any other machines, then you may need to enable - IP Masquerading on the host. Usually this is only experienced when - using private IP addresses (192.168.x.x or 10.x.x.x) for host/UML - networking, rather than the public address space that your host is - connected to. UML does not enable IP Masquerading, so you will need - to create a static rule to enable it: - - - host% - iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE - - - - - Replace eth0 with the interface that you use to talk to the rest of - the world. - - - Documentation on IP Masquerading, and SNAT, can be found at - www.netfilter.org . - - - If you can reach the local net, but not the outside Internet, then - that is usually a routing problem. The UML needs a default route: - - - UML# - route add default gw gateway IP - - - - - The gateway IP can be any machine on the local net that knows how to - reach the outside world. Usually, this is the host or the local net- - work's gateway. - - - Occasionally, we hear from someone who can reach some machines, but - not others on the same net, or who can reach some ports on other - machines, but not others. These are usually caused by strange - firewalling somewhere between the UML and the other box. You track - this down by running tcpdump on every interface the packets travel - over and see where they disappear. When you find a machine that takes - the packets in, but does not send them onward, that's the culprit. - - - - 1133..1100.. II hhaavvee nnoo rroooott aanndd II wwaanntt ttoo ssccrreeaamm - - Thanks to Birgit Wahlich for telling me about this strange one. It - turns out that there's a limit of six environment variables on the - kernel command line. When that limit is reached or exceeded, argument - processing stops, which means that the 'root=' argument that UML - usually adds is not seen. So, the filesystem has no idea what the - root device is, so it panics. - - - The fix is to put less stuff on the command line. Glomming all your - setup variables into one is probably the best way to go. - - - - 1133..1111.. UUMMLL bbuuiilldd ccoonnfflliicctt bbeettwweeeenn ppttrraaccee..hh aanndd uuccoonntteexxtt..hh - - On some older systems, /usr/include/asm/ptrace.h and - /usr/include/sys/ucontext.h define the same names. So, when they're - included together, the defines from one completely mess up the parsing - of the other, producing errors like: - /usr/include/sys/ucontext.h:47: parse error before - `10' - - - - - plus a pile of warnings. - - - This is a libc botch, which has since been fixed, and I don't see any - way around it besides upgrading. - - - - 1133..1122.. TThhee UUMMLL BBooggooMMiippss iiss eexxaaccttllyy hhaallff tthhee hhoosstt''ss BBooggooMMiippss - - On i386 kernels, there are two ways of running the loop that is used - to calculate the BogoMips rating, using the TSC if it's there or using - a one-instruction loop. The TSC produces twice the BogoMips as the - loop. UML uses the loop, since it has nothing resembling a TSC, and - will get almost exactly the same BogoMips as a host using the loop. - However, on a host with a TSC, its BogoMips will be double the loop - BogoMips, and therefore double the UML BogoMips. - - - - 1133..1133.. WWhheenn yyoouu rruunn UUMMLL,, iitt iimmmmeeddiiaatteellyy sseeggffaauullttss - - If the host is configured with the 2G/2G address space split, that's - why. See ``UML on 2G/2G hosts'' for the details on getting UML to - run on your host. - - - - 1133..1144.. xxtteerrmmss aappppeeaarr,, tthheenn iimmmmeeddiiaatteellyy ddiissaappppeeaarr - - If you're running an up to date kernel with an old release of - uml_utilities, the port-helper program will not work properly, so - xterms will exit straight after they appear. The solution is to - upgrade to the latest release of uml_utilities. Usually this problem - occurs when you have installed a packaged release of UML then compiled - your own development kernel without upgrading the uml_utilities from - the source distribution. - - - - 1133..1155.. AAnnyy ootthheerr ppaanniicc,, hhaanngg,, oorr ssttrraannggee bbeehhaavviioorr - - If you're seeing truly strange behavior, such as hangs or panics that - happen in random places, or you try running the debugger to see what's - happening and it acts strangely, then it could be a problem in the - host kernel. If you're not running a stock Linus or -ac kernel, then - try that. An early version of the preemption patch and a 2.4.10 SuSE - kernel have caused very strange problems in UML. - - - Otherwise, let me know about it. Send a message to one of the UML - mailing lists - either the developer list - user-mode-linux-devel at - lists dot sourceforge dot net (subscription info) or the user list - - user-mode-linux-user at lists dot sourceforge do net (subscription - info), whichever you prefer. Don't assume that everyone knows about - it and that a fix is imminent. - - - If you want to be super-helpful, read ``Diagnosing Problems'' and - follow the instructions contained therein. - 1144.. DDiiaaggnnoossiinngg PPrroobblleemmss - - - If you get UML to crash, hang, or otherwise misbehave, you should - report this on one of the project mailing lists, either the developer - list - user-mode-linux-devel at lists dot sourceforge dot net - (subscription info) or the user list - user-mode-linux-user at lists - dot sourceforge dot net (subscription info). When you do, it is - likely that I will want more information. So, it would be helpful to - read the stuff below, do whatever is applicable in your case, and - report the results to the list. - - - For any diagnosis, you're going to need to build a debugging kernel. - The binaries from this site aren't debuggable. If you haven't done - this before, read about ``Compiling the kernel and modules'' and - ``Kernel debugging'' UML first. - - - 1144..11.. CCaassee 11 :: NNoorrmmaall kkeerrnneell ppaanniiccss - - The most common case is for a normal thread to panic. To debug this, - you will need to run it under the debugger (add 'debug' to the command - line). An xterm will start up with gdb running inside it. Continue - it when it stops in start_kernel and make it crash. Now ^C gdb and - - - If the panic was a "Kernel mode fault", then there will be a segv - frame on the stack and I'm going to want some more information. The - stack might look something like this: - - - (UML gdb) backtrace - #0 0x1009bf76 in __sigprocmask (how=1, set=0x5f347940, oset=0x0) - at ../sysdeps/unix/sysv/linux/sigprocmask.c:49 - #1 0x10091411 in change_sig (signal=10, on=1) at process.c:218 - #2 0x10094785 in timer_handler (sig=26) at time_kern.c:32 - #3 0x1009bf38 in __restore () - at ../sysdeps/unix/sysv/linux/i386/sigaction.c:125 - #4 0x1009534c in segv (address=8, ip=268849158, is_write=2, is_user=0) - at trap_kern.c:66 - #5 0x10095c04 in segv_handler (sig=11) at trap_user.c:285 - #6 0x1009bf38 in __restore () - - - - - I'm going to want to see the symbol and line information for the value - of ip in the segv frame. In this case, you would do the following: - - - (UML gdb) i sym 268849158 - - - - - and - - - (UML gdb) i line *268849158 - - - - - The reason for this is the __restore frame right above the segv_han- - dler frame is hiding the frame that actually segfaulted. So, I have - to get that information from the faulting ip. - - - 1144..22.. CCaassee 22 :: TTrraacciinngg tthhrreeaadd ppaanniiccss - - The less common and more painful case is when the tracing thread - panics. In this case, the kernel debugger will be useless because it - needs a healthy tracing thread in order to work. The first thing to - do is get a backtrace from the tracing thread. This is done by - figuring out what its pid is, firing up gdb, and attaching it to that - pid. You can figure out the tracing thread pid by looking at the - first line of the console output, which will look like this: - - - tracing thread pid = 15851 - - - - - or by running ps on the host and finding the line that looks like - this: - - - jdike 15851 4.5 0.4 132568 1104 pts/0 S 21:34 0:05 ./linux [(tracing thread)] - - - - - If the panic was 'segfault in signals', then follow the instructions - above for collecting information about the location of the seg fault. - - - If the tracing thread flaked out all by itself, then send that - backtrace in and wait for our crack debugging team to fix the problem. - - - 1144..33.. CCaassee 33 :: TTrraacciinngg tthhrreeaadd ppaanniiccss ccaauusseedd bbyy ootthheerr tthhrreeaaddss - - However, there are cases where the misbehavior of another thread - caused the problem. The most common panic of this type is: - - - wait_for_stop failed to wait for to stop with - - - - - In this case, you'll need to get a backtrace from the process men- - tioned in the panic, which is complicated by the fact that the kernel - debugger is defunct and without some fancy footwork, another gdb can't - attach to it. So, this is how the fancy footwork goes: - - In a shell: - - - host% kill -STOP pid - - - - - Run gdb on the tracing thread as described in case 2 and do: - - - (host gdb) call detach(pid) - - - If you get a segfault, do it again. It always works the second time. - - Detach from the tracing thread and attach to that other thread: - - - (host gdb) detach - - - - - - - (host gdb) attach pid - - - - - If gdb hangs when attaching to that process, go back to a shell and - do: - - - host% - kill -CONT pid - - - - - And then get the backtrace: - - - (host gdb) backtrace - - - - - - 1144..44.. CCaassee 44 :: HHaannggss - - Hangs seem to be fairly rare, but they sometimes happen. When a hang - happens, we need a backtrace from the offending process. Run the - kernel debugger as described in case 1 and get a backtrace. If the - current process is not the idle thread, then send in the backtrace. - You can tell that it's the idle thread if the stack looks like this: - - - #0 0x100b1401 in __libc_nanosleep () - #1 0x100a2885 in idle_sleep (secs=10) at time.c:122 - #2 0x100a546f in do_idle () at process_kern.c:445 - #3 0x100a5508 in cpu_idle () at process_kern.c:471 - #4 0x100ec18f in start_kernel () at init/main.c:592 - #5 0x100a3e10 in start_kernel_proc (unused=0x0) at um_arch.c:71 - #6 0x100a383f in signal_tramp (arg=0x100a3dd8) at trap_user.c:50 - - - - - If this is the case, then some other process is at fault, and went to - sleep when it shouldn't have. Run ps on the host and figure out which - process should not have gone to sleep and stayed asleep. Then attach - to it with gdb and get a backtrace as described in case 3. - - - - - - - 1155.. TThhaannkkss - - - A number of people have helped this project in various ways, and this - page gives recognition where recognition is due. - - - If you're listed here and you would prefer a real link on your name, - or no link at all, instead of the despammed email address pseudo-link, - let me know. - - - If you're not listed here and you think maybe you should be, please - let me know that as well. I try to get everyone, but sometimes my - bookkeeping lapses and I forget about contributions. - - - 1155..11.. CCooddee aanndd DDooccuummeennttaattiioonn - - Rusty Russell - - - +o wrote the HOWTO - - +o prodded me into making this project official and putting it on - SourceForge - - +o came up with the way cool UML logo - - +o redid the config process - - - Peter Moulder - Fixed my config and build - processes, and added some useful code to the block driver - - - Bill Stearns - - - +o HOWTO updates - - +o lots of bug reports - - +o lots of testing - - +o dedicated a box (uml.ists.dartmouth.edu) to support UML development - - +o wrote the mkrootfs script, which allows bootable filesystems of - RPM-based distributions to be cranked out - - +o cranked out a large number of filesystems with said script - - - Jim Leu - Wrote the virtual ethernet driver - and associated usermode tools - - Lars Brinkhoff - Contributed the ptrace - proxy from his own project to allow easier - kernel debugging - - - Andrea Arcangeli - Redid some of the early boot - code so that it would work on machines with Large File Support - - - Chris Emerson - Did - the first UML port to Linux/ppc - - - Harald Welte - Wrote the multicast - transport for the network driver - - - Jorgen Cederlof - Added special file support to hostfs - - - Greg Lonnon - Changed the ubd driver - to allow it to layer a COW file on a shared read-only filesystem and - wrote the iomem emulation support - - - Henrik Nordstrom - Provided a variety - of patches, fixes, and clues - - - Lennert Buytenhek - Contributed various patches, a rewrite of the - network driver, the first implementation of the mconsole driver, and - did the bulk of the work needed to get SMP working again. - - - Yon Uriarte - Fixed the TUN/TAP network backend while I slept. - - - Adam Heath - Made a bunch of nice cleanups to the initialization code, - plus various other small patches. - - - Matt Zimmerman - Matt volunteered to be the UML Debian maintainer and - is doing a real nice job of it. He also noticed and fixed a number of - actually and potentially exploitable security holes in uml_net. Plus - the occasional patch. I like patches. - - - James McMechan - James seems to have taken over maintenance of the ubd - driver and is doing a nice job of it. - - - Chandan Kudige - wrote the umlgdb script which automates the reloading - of module symbols. - - - Steve Schmidtke - wrote the UML slirp transport and hostaudio drivers, - enabling UML processes to access audio devices on the host. He also - submitted patches for the slip transport and lots of other things. - - - David Coulson - - - +o Set up the usermodelinux.org site, - which is a great way of keeping the UML user community on top of - UML goings-on. - - +o Site documentation and updates - - +o Nifty little UML management daemon UMLd - - - +o Lots of testing and bug reports - - - - - 1155..22.. FFlluusshhiinngg oouutt bbuuggss - - - - +o Yuri Pudgorodsky - - +o Gerald Britton - - +o Ian Wehrman - - +o Gord Lamb - - +o Eugene Koontz - - +o John H. Hartman - - +o Anders Karlsson - - +o Daniel Phillips - - +o John Fremlin - - +o Rainer Burgstaller - - +o James Stevenson - - +o Matt Clay - - +o Cliff Jefferies - - +o Geoff Hoff - - +o Lennert Buytenhek - - +o Al Viro - - +o Frank Klingenhoefer - - +o Livio Baldini Soares - - +o Jon Burgess - - +o Petru Paler - - +o Paul - - +o Chris Reahard - - +o Sverker Nilsson - - +o Gong Su - - +o johan verrept - - +o Bjorn Eriksson - - +o Lorenzo Allegrucci - - +o Muli Ben-Yehuda - - +o David Mansfield - - +o Howard Goff - - +o Mike Anderson - - +o John Byrne - - +o Sapan J. Batia - - +o Iris Huang - - +o Jan Hudec - - +o Voluspa - - - - - 1155..33.. BBuugglleettss aanndd cclleeaann--uuppss - - - - +o Dave Zarzycki - - +o Adam Lazur - - +o Boria Feigin - - +o Brian J. Murrell - - +o JS - - +o Roman Zippel - - +o Wil Cooley - - +o Ayelet Shemesh - - +o Will Dyson - - +o Sverker Nilsson - - +o dvorak - - +o v.naga srinivas - - +o Shlomi Fish - - +o Roger Binns - - +o johan verrept - - +o MrChuoi - - +o Peter Cleve - - +o Vincent Guffens - - +o Nathan Scott - - +o Patrick Caulfield - - +o jbearce - - +o Catalin Marinas - - +o Shane Spencer - - +o Zou Min - - - +o Ryan Boder - - +o Lorenzo Colitti - - +o Gwendal Grignou - - +o Andre' Breiler - - +o Tsutomu Yasuda - - - - 1155..44.. CCaassee SSttuuddiieess - - - +o Jon Wright - - +o William McEwan - - +o Michael Richardson - - - - 1155..55.. OOtthheerr ccoonnttrriibbuuttiioonnss - - - Bill Carr made the Red Hat mkrootfs script - work with RH 6.2. - - Michael Jennings sent in some material which - is now gracing the top of the index page of this site. - - SGI (and more specifically Ralf Baechle ) gave me an account on oss.sgi.com - . The bandwidth there made it possible to - produce most of the filesystems available on the project download - page. - - Laurent Bonnaud took the old grotty - Debian filesystem that I've been distributing and updated it to 2.2. - It is now available by itself here. - - Rik van Riel gave me some ftp space on ftp.nl.linux.org so I can make - releases even when Sourceforge is broken. - - Rodrigo de Castro looked at my broken pte code and told me what was - wrong with it, letting me fix a long-standing (several weeks) and - serious set of bugs. - - Chris Reahard built a specialized root filesystem for running a DNS - server jailed inside UML. It's available from the download - page in the Jail - Filesystems section. - - - - - - - - - - - - diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt index bfb36b34b79e..9e85846bdb98 100644 --- a/Documentation/usb/callbacks.txt +++ b/Documentation/usb/callbacks.txt @@ -95,9 +95,11 @@ pre_reset int (*pre_reset)(struct usb_interface *intf); -Another driver or user space is triggering a reset on the device which -contains the interface passed as an argument. Cease IO and save any -device state you need to restore. +A driver or user space is triggering a reset on the device which +contains the interface passed as an argument. Cease IO, wait for all +outstanding URBs to complete, and save any device state you need to +restore. No more URBs may be submitted until the post_reset method +is called. If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you are in atomic context. diff --git a/Documentation/usb/linux-cdc-acm.inf b/Documentation/usb/linux-cdc-acm.inf index 612e7220fb29..37a02ce54841 100644 --- a/Documentation/usb/linux-cdc-acm.inf +++ b/Documentation/usb/linux-cdc-acm.inf @@ -90,10 +90,10 @@ ServiceBinary=%12%\USBSER.sys [SourceDisksFiles] [SourceDisksNames] [DeviceList] -%DESCRIPTION%=DriverInstall, USB\VID_0525&PID_A4A7, USB\VID_0525&PID_A4AB&MI_02 +%DESCRIPTION%=DriverInstall, USB\VID_0525&PID_A4A7, USB\VID_1D6B&PID_0104&MI_02 [DeviceList.NTamd64] -%DESCRIPTION%=DriverInstall, USB\VID_0525&PID_A4A7, USB\VID_0525&PID_A4AB&MI_02 +%DESCRIPTION%=DriverInstall, USB\VID_0525&PID_A4A7, USB\VID_1D6B&PID_0104&MI_02 ;------------------------------------------------------------------------------ diff --git a/Documentation/usb/linux.inf b/Documentation/usb/linux.inf index 4dee95851224..4ffa715b0ae8 100644 --- a/Documentation/usb/linux.inf +++ b/Documentation/usb/linux.inf @@ -18,15 +18,15 @@ DriverVer = 06/21/2006,6.0.6000.16384 ; Decoration for x86 architecture [LinuxDevices.NTx86] -%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_0525&PID_a4ab&MI_00 +%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_1d6b&PID_0104&MI_00 ; Decoration for x64 architecture [LinuxDevices.NTamd64] -%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_0525&PID_a4ab&MI_00 +%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_1d6b&PID_0104&MI_00 ; Decoration for ia64 architecture [LinuxDevices.NTia64] -%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_0525&PID_a4ab&MI_00 +%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_1d6b&PID_0104&MI_00 ;@@@ This is the common setting for setup [ControlFlags] diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt index 43a9b0694fdd..b7d401e0eae9 100644 --- a/Documentation/vgaarbiter.txt +++ b/Documentation/vgaarbiter.txt @@ -14,11 +14,10 @@ the legacy VGA arbitration task (besides other bus management tasks) when more than one legacy device co-exists on the same machine. But the problem happens when these devices are trying to be accessed by different userspace clients (e.g. two server in parallel). Their address assignments conflict. Moreover, -ideally, being an userspace application, it is not the role of the the X -server to control bus resources. Therefore an arbitration scheme outside of -the X server is needed to control the sharing of these resources. This -document introduces the operation of the VGA arbiter implemented for Linux -kernel. +ideally, being a userspace application, it is not the role of the X server to +control bus resources. Therefore an arbitration scheme outside of the X server +is needed to control the sharing of these resources. This document introduces +the operation of the VGA arbiter implemented for the Linux kernel. ---------------------------------------------------------------------------- @@ -39,7 +38,7 @@ I.1 vgaarb The vgaarb is a module of the Linux Kernel. When it is initially loaded, it scans all PCI devices and adds the VGA ones inside the arbitration. The arbiter then enables/disables the decoding on different devices of the VGA -legacy instructions. Device which do not want/need to use the arbiter may +legacy instructions. Devices which do not want/need to use the arbiter may explicitly tell it by calling vga_set_legacy_decoding(). The kernel exports a char device interface (/dev/vga_arbiter) to the clients, @@ -95,8 +94,8 @@ In the case of devices hot-{un,}plugged, there is a hook - pci_notify() - to notify them being added/removed in the system and automatically added/removed in the arbiter. -There's also a in-kernel API of the arbiter in the case of DRM, vgacon and -others which may use the arbiter. +There is also an in-kernel API of the arbiter in case DRM, vgacon, or other +drivers want to use it. I.2 libpciaccess @@ -117,9 +116,8 @@ Besides it, in pci_system were added: struct pci_device *vga_default_dev; -The vga_count is usually need to keep informed how many cards are being -arbitrated, so for instance if there's only one then it can totally escape the -scheme. +The vga_count is used to track how many cards are being arbitrated, so for +instance, if there is only one card, then it can completely escape arbitration. These functions below acquire VGA resources for the given card and mark those diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 31b485723bc5..9aae449440dc 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -54,7 +54,7 @@ 53 -> Pinnacle Hybrid Pro (em2881) 54 -> Kworld VS-DVB-T 323UR (em2882) [eb1a:e323] 55 -> Terratec Cinnergy Hybrid T USB XS (em2882) (em2882) [0ccd:005e,0ccd:0042] - 56 -> Pinnacle Hybrid Pro (2) (em2882) [2304:0226] + 56 -> Pinnacle Hybrid Pro (330e) (em2882) [2304:0226] 57 -> Kworld PlusTV HD Hybrid 330 (em2883) [eb1a:a316] 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041] 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f] diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran index c40e3bab08fa..9ed629d4874b 100644 --- a/Documentation/video4linux/Zoran +++ b/Documentation/video4linux/Zoran @@ -130,7 +130,6 @@ Card number: 4 Note: No module for the mse3000 is available yet Note: No module for the vpx3224 is available yet -Note: use encoder=X or decoder=X for non-default i2c chips =========================== diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 5c542e60f51d..5bfa9a777d26 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -275,6 +275,7 @@ pac7302 093a:2629 Genious iSlim 300 pac7302 093a:262a Webcam 300k pac7302 093a:262c Philips SPC 230 NC jeilinj 0979:0280 Sakar 57379 +jeilinj 0979:0280 Sportscam DV15 zc3xx 0ac8:0302 Z-star Vimicro zc0302 vc032x 0ac8:0321 Vimicro generic vc0321 vc032x 0ac8:0323 Vimicro Vc0323 diff --git a/Documentation/video4linux/sh_mobile_ceu_camera.txt b/Documentation/video4linux/sh_mobile_ceu_camera.txt index cb47e723af74..1e96ce6e2d2f 100644 --- a/Documentation/video4linux/sh_mobile_ceu_camera.txt +++ b/Documentation/video4linux/sh_mobile_ceu_camera.txt @@ -37,7 +37,7 @@ Generic scaling / cropping scheme -1'- In the above chart minuses and slashes represent "real" data amounts, points and -accents represent "useful" data, basically, CEU scaled amd cropped output, +accents represent "useful" data, basically, CEU scaled and cropped output, mapped back onto the client's source plane. Such a configuration can be produced by user requests: @@ -65,7 +65,7 @@ Do not touch input rectangle - it is already optimal. 1. Calculate current sensor scales: - scale_s = ((3') - (3)) / ((2') - (2)) + scale_s = ((2') - (2)) / ((3') - (3)) 2. Calculate "effective" input crop (sensor subwindow) - CEU crop scaled back at current sensor scales onto input window - this is user S_CROP: @@ -80,7 +80,7 @@ window: 4. Calculate sensor output window by applying combined scales to real input window: - width_s_out = ((2') - (2)) / scale_comb + width_s_out = ((7') - (7)) = ((2') - (2)) / scale_comb 5. Apply iterative sensor S_FMT for sensor output window. diff --git a/Documentation/video4linux/uvcvideo.txt b/Documentation/video4linux/uvcvideo.txt new file mode 100644 index 000000000000..848d620dcc5c --- /dev/null +++ b/Documentation/video4linux/uvcvideo.txt @@ -0,0 +1,239 @@ +Linux USB Video Class (UVC) driver +================================== + +This file documents some driver-specific aspects of the UVC driver, such as +driver-specific ioctls and implementation notes. + +Questions and remarks can be sent to the Linux UVC development mailing list at +linux-uvc-devel@lists.berlios.de. + + +Extension Unit (XU) support +--------------------------- + +1. Introduction + +The UVC specification allows for vendor-specific extensions through extension +units (XUs). The Linux UVC driver supports extension unit controls (XU controls) +through two separate mechanisms: + + - through mappings of XU controls to V4L2 controls + - through a driver-specific ioctl interface + +The first one allows generic V4L2 applications to use XU controls by mapping +certain XU controls onto V4L2 controls, which then show up during ordinary +control enumeration. + +The second mechanism requires uvcvideo-specific knowledge for the application to +access XU controls but exposes the entire UVC XU concept to user space for +maximum flexibility. + +Both mechanisms complement each other and are described in more detail below. + + +2. Control mappings + +The UVC driver provides an API for user space applications to define so-called +control mappings at runtime. These allow for individual XU controls or byte +ranges thereof to be mapped to new V4L2 controls. Such controls appear and +function exactly like normal V4L2 controls (i.e. the stock controls, such as +brightness, contrast, etc.). However, reading or writing of such a V4L2 controls +triggers a read or write of the associated XU control. + +The ioctl used to create these control mappings is called UVCIOC_CTRL_MAP. +Previous driver versions (before 0.2.0) required another ioctl to be used +beforehand (UVCIOC_CTRL_ADD) to pass XU control information to the UVC driver. +This is no longer necessary as newer uvcvideo versions query the information +directly from the device. + +For details on the UVCIOC_CTRL_MAP ioctl please refer to the section titled +"IOCTL reference" below. + + +3. Driver specific XU control interface + +For applications that need to access XU controls directly, e.g. for testing +purposes, firmware upload, or accessing binary controls, a second mechanism to +access XU controls is provided in the form of a driver-specific ioctl, namely +UVCIOC_CTRL_QUERY. + +A call to this ioctl allows applications to send queries to the UVC driver that +directly map to the low-level UVC control requests. + +In order to make such a request the UVC unit ID of the control's extension unit +and the control selector need to be known. This information either needs to be +hardcoded in the application or queried using other ways such as by parsing the +UVC descriptor or, if available, using the media controller API to enumerate a +device's entities. + +Unless the control size is already known it is necessary to first make a +UVC_GET_LEN requests in order to be able to allocate a sufficiently large buffer +and set the buffer size to the correct value. Similarly, to find out whether +UVC_GET_CUR or UVC_SET_CUR are valid requests for a given control, a +UVC_GET_INFO request should be made. The bits 0 (GET supported) and 1 (SET +supported) of the resulting byte indicate which requests are valid. + +With the addition of the UVCIOC_CTRL_QUERY ioctl the UVCIOC_CTRL_GET and +UVCIOC_CTRL_SET ioctls have become obsolete since their functionality is a +subset of the former ioctl. For the time being they are still supported but +application developers are encouraged to use UVCIOC_CTRL_QUERY instead. + +For details on the UVCIOC_CTRL_QUERY ioctl please refer to the section titled +"IOCTL reference" below. + + +4. Security + +The API doesn't currently provide a fine-grained access control facility. The +UVCIOC_CTRL_ADD and UVCIOC_CTRL_MAP ioctls require super user permissions. + +Suggestions on how to improve this are welcome. + + +5. Debugging + +In order to debug problems related to XU controls or controls in general it is +recommended to enable the UVC_TRACE_CONTROL bit in the module parameter 'trace'. +This causes extra output to be written into the system log. + + +6. IOCTL reference + +---- UVCIOC_CTRL_MAP - Map a UVC control to a V4L2 control ---- + +Argument: struct uvc_xu_control_mapping + +Description: + This ioctl creates a mapping between a UVC control or part of a UVC + control and a V4L2 control. Once mappings are defined, userspace + applications can access vendor-defined UVC control through the V4L2 + control API. + + To create a mapping, applications fill the uvc_xu_control_mapping + structure with information about an existing UVC control defined with + UVCIOC_CTRL_ADD and a new V4L2 control. + + A UVC control can be mapped to several V4L2 controls. For instance, + a UVC pan/tilt control could be mapped to separate pan and tilt V4L2 + controls. The UVC control is divided into non overlapping fields using + the 'size' and 'offset' fields and are then independantly mapped to + V4L2 control. + + For signed integer V4L2 controls the data_type field should be set to + UVC_CTRL_DATA_TYPE_SIGNED. Other values are currently ignored. + +Return value: + On success 0 is returned. On error -1 is returned and errno is set + appropriately. + + ENOMEM + Not enough memory to perform the operation. + EPERM + Insufficient privileges (super user privileges are required). + EINVAL + No such UVC control. + EOVERFLOW + The requested offset and size would overflow the UVC control. + EEXIST + Mapping already exists. + +Data types: + * struct uvc_xu_control_mapping + + __u32 id V4L2 control identifier + __u8 name[32] V4L2 control name + __u8 entity[16] UVC extension unit GUID + __u8 selector UVC control selector + __u8 size V4L2 control size (in bits) + __u8 offset V4L2 control offset (in bits) + enum v4l2_ctrl_type + v4l2_type V4L2 control type + enum uvc_control_data_type + data_type UVC control data type + struct uvc_menu_info + *menu_info Array of menu entries (for menu controls only) + __u32 menu_count Number of menu entries (for menu controls only) + + * struct uvc_menu_info + + __u32 value Menu entry value used by the device + __u8 name[32] Menu entry name + + + * enum uvc_control_data_type + + UVC_CTRL_DATA_TYPE_RAW Raw control (byte array) + UVC_CTRL_DATA_TYPE_SIGNED Signed integer + UVC_CTRL_DATA_TYPE_UNSIGNED Unsigned integer + UVC_CTRL_DATA_TYPE_BOOLEAN Boolean + UVC_CTRL_DATA_TYPE_ENUM Enumeration + UVC_CTRL_DATA_TYPE_BITMASK Bitmask + + +---- UVCIOC_CTRL_QUERY - Query a UVC XU control ---- + +Argument: struct uvc_xu_control_query + +Description: + This ioctl queries a UVC XU control identified by its extension unit ID + and control selector. + + There are a number of different queries available that closely + correspond to the low-level control requests described in the UVC + specification. These requests are: + + UVC_GET_CUR + Obtain the current value of the control. + UVC_GET_MIN + Obtain the minimum value of the control. + UVC_GET_MAX + Obtain the maximum value of the control. + UVC_GET_DEF + Obtain the default value of the control. + UVC_GET_RES + Query the resolution of the control, i.e. the step size of the + allowed control values. + UVC_GET_LEN + Query the size of the control in bytes. + UVC_GET_INFO + Query the control information bitmap, which indicates whether + get/set requests are supported. + UVC_SET_CUR + Update the value of the control. + + Applications must set the 'size' field to the correct length for the + control. Exceptions are the UVC_GET_LEN and UVC_GET_INFO queries, for + which the size must be set to 2 and 1, respectively. The 'data' field + must point to a valid writable buffer big enough to hold the indicated + number of data bytes. + + Data is copied directly from the device without any driver-side + processing. Applications are responsible for data buffer formatting, + including little-endian/big-endian conversion. This is particularly + important for the result of the UVC_GET_LEN requests, which is always + returned as a little-endian 16-bit integer by the device. + +Return value: + On success 0 is returned. On error -1 is returned and errno is set + appropriately. + + ENOENT + The device does not support the given control or the specified + extension unit could not be found. + ENOBUFS + The specified buffer size is incorrect (too big or too small). + EINVAL + An invalid request code was passed. + EBADRQC + The given request is not supported by the given control. + EFAULT + The data pointer references an inaccessible memory area. + +Data types: + * struct uvc_xu_control_query + + __u8 unit Extension unit ID + __u8 selector Control selector + __u8 query Request code to send to the device + __u16 size Control data size (in bytes) + __u8 *data Control value diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX new file mode 100644 index 000000000000..fe0251c4cfb7 --- /dev/null +++ b/Documentation/virtual/00-INDEX @@ -0,0 +1,10 @@ +Virtualization support in the Linux kernel. + +00-INDEX + - this file. +kvm/ + - Kernel Virtual Machine. See also http://linux-kvm.org +lguest/ + - Extremely simple hypervisor for experimental/educational use. +uml/ + - User Mode Linux, builds/runs Linux kernel as a userspace program. diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt new file mode 100644 index 000000000000..42542eb802ca --- /dev/null +++ b/Documentation/virtual/kvm/api.txt @@ -0,0 +1,1479 @@ +The Definitive KVM (Kernel-based Virtual Machine) API Documentation +=================================================================== + +1. General description + +The kvm API is a set of ioctls that are issued to control various aspects +of a virtual machine. The ioctls belong to three classes + + - System ioctls: These query and set global attributes which affect the + whole kvm subsystem. In addition a system ioctl is used to create + virtual machines + + - VM ioctls: These query and set attributes that affect an entire virtual + machine, for example memory layout. In addition a VM ioctl is used to + create virtual cpus (vcpus). + + Only run VM ioctls from the same process (address space) that was used + to create the VM. + + - vcpu ioctls: These query and set attributes that control the operation + of a single virtual cpu. + + Only run vcpu ioctls from the same thread that was used to create the + vcpu. + +2. File descriptors + +The kvm API is centered around file descriptors. An initial +open("/dev/kvm") obtains a handle to the kvm subsystem; this handle +can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this +handle will create a VM file descriptor which can be used to issue VM +ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu +and return a file descriptor pointing to it. Finally, ioctls on a vcpu +fd can be used to control the vcpu, including the important task of +actually running guest code. + +In general file descriptors can be migrated among processes by means +of fork() and the SCM_RIGHTS facility of unix domain socket. These +kinds of tricks are explicitly not supported by kvm. While they will +not cause harm to the host, their actual behavior is not guaranteed by +the API. The only supported use is one virtual machine per process, +and one vcpu per thread. + +3. Extensions + +As of Linux 2.6.22, the KVM ABI has been stabilized: no backward +incompatible change are allowed. However, there is an extension +facility that allows backward-compatible extensions to the API to be +queried and used. + +The extension mechanism is not based on on the Linux version number. +Instead, kvm defines extension identifiers and a facility to query +whether a particular extension identifier is available. If it is, a +set of ioctls is available for application use. + +4. API description + +This section describes ioctls that can be used to control kvm guests. +For each ioctl, the following information is provided along with a +description: + + Capability: which KVM extension provides this ioctl. Can be 'basic', + which means that is will be provided by any kernel that supports + API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which + means availability needs to be checked with KVM_CHECK_EXTENSION + (see section 4.4). + + Architectures: which instruction set architectures provide this ioctl. + x86 includes both i386 and x86_64. + + Type: system, vm, or vcpu. + + Parameters: what parameters are accepted by the ioctl. + + Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL) + are not detailed, but errors with specific meanings are. + +4.1 KVM_GET_API_VERSION + +Capability: basic +Architectures: all +Type: system ioctl +Parameters: none +Returns: the constant KVM_API_VERSION (=12) + +This identifies the API version as the stable kvm API. It is not +expected that this number will change. However, Linux 2.6.20 and +2.6.21 report earlier versions; these are not documented and not +supported. Applications should refuse to run if KVM_GET_API_VERSION +returns a value other than 12. If this check passes, all ioctls +described as 'basic' will be available. + +4.2 KVM_CREATE_VM + +Capability: basic +Architectures: all +Type: system ioctl +Parameters: none +Returns: a VM fd that can be used to control the new virtual machine. + +The new VM has no virtual cpus and no memory. An mmap() of a VM fd +will access the virtual machine's physical address space; offset zero +corresponds to guest physical address zero. Use of mmap() on a VM fd +is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is +available. + +4.3 KVM_GET_MSR_INDEX_LIST + +Capability: basic +Architectures: x86 +Type: system +Parameters: struct kvm_msr_list (in/out) +Returns: 0 on success; -1 on error +Errors: + E2BIG: the msr index list is to be to fit in the array specified by + the user. + +struct kvm_msr_list { + __u32 nmsrs; /* number of msrs in entries */ + __u32 indices[0]; +}; + +This ioctl returns the guest msrs that are supported. The list varies +by kvm version and host processor, but does not change otherwise. The +user fills in the size of the indices array in nmsrs, and in return +kvm adjusts nmsrs to reflect the actual number of msrs and fills in +the indices array with their numbers. + +Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are +not returned in the MSR list, as different vcpus can have a different number +of banks, as set via the KVM_X86_SETUP_MCE ioctl. + +4.4 KVM_CHECK_EXTENSION + +Capability: basic +Architectures: all +Type: system ioctl +Parameters: extension identifier (KVM_CAP_*) +Returns: 0 if unsupported; 1 (or some other positive integer) if supported + +The API allows the application to query about extensions to the core +kvm API. Userspace passes an extension identifier (an integer) and +receives an integer that describes the extension availability. +Generally 0 means no and 1 means yes, but some extensions may report +additional information in the integer return value. + +4.5 KVM_GET_VCPU_MMAP_SIZE + +Capability: basic +Architectures: all +Type: system ioctl +Parameters: none +Returns: size of vcpu mmap area, in bytes + +The KVM_RUN ioctl (cf.) communicates with userspace via a shared +memory region. This ioctl returns the size of that region. See the +KVM_RUN documentation for details. + +4.6 KVM_SET_MEMORY_REGION + +Capability: basic +Architectures: all +Type: vm ioctl +Parameters: struct kvm_memory_region (in) +Returns: 0 on success, -1 on error + +This ioctl is obsolete and has been removed. + +4.7 KVM_CREATE_VCPU + +Capability: basic +Architectures: all +Type: vm ioctl +Parameters: vcpu id (apic id on x86) +Returns: vcpu fd on success, -1 on error + +This API adds a vcpu to a virtual machine. The vcpu id is a small integer +in the range [0, max_vcpus). You can use KVM_CAP_NR_VCPUS of the +KVM_CHECK_EXTENSION ioctl() to determine the value for max_vcpus at run-time. +If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4 +cpus max. + +4.8 KVM_GET_DIRTY_LOG (vm ioctl) + +Capability: basic +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_dirty_log (in/out) +Returns: 0 on success, -1 on error + +/* for KVM_GET_DIRTY_LOG */ +struct kvm_dirty_log { + __u32 slot; + __u32 padding; + union { + void __user *dirty_bitmap; /* one bit per page */ + __u64 padding; + }; +}; + +Given a memory slot, return a bitmap containing any pages dirtied +since the last call to this ioctl. Bit 0 is the first page in the +memory slot. Ensure the entire structure is cleared to avoid padding +issues. + +4.9 KVM_SET_MEMORY_ALIAS + +Capability: basic +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_memory_alias (in) +Returns: 0 (success), -1 (error) + +This ioctl is obsolete and has been removed. + +4.10 KVM_RUN + +Capability: basic +Architectures: all +Type: vcpu ioctl +Parameters: none +Returns: 0 on success, -1 on error +Errors: + EINTR: an unmasked signal is pending + +This ioctl is used to run a guest virtual cpu. While there are no +explicit parameters, there is an implicit parameter block that can be +obtained by mmap()ing the vcpu fd at offset 0, with the size given by +KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct +kvm_run' (see below). + +4.11 KVM_GET_REGS + +Capability: basic +Architectures: all +Type: vcpu ioctl +Parameters: struct kvm_regs (out) +Returns: 0 on success, -1 on error + +Reads the general purpose registers from the vcpu. + +/* x86 */ +struct kvm_regs { + /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ + __u64 rax, rbx, rcx, rdx; + __u64 rsi, rdi, rsp, rbp; + __u64 r8, r9, r10, r11; + __u64 r12, r13, r14, r15; + __u64 rip, rflags; +}; + +4.12 KVM_SET_REGS + +Capability: basic +Architectures: all +Type: vcpu ioctl +Parameters: struct kvm_regs (in) +Returns: 0 on success, -1 on error + +Writes the general purpose registers into the vcpu. + +See KVM_GET_REGS for the data structure. + +4.13 KVM_GET_SREGS + +Capability: basic +Architectures: x86, ppc +Type: vcpu ioctl +Parameters: struct kvm_sregs (out) +Returns: 0 on success, -1 on error + +Reads special registers from the vcpu. + +/* x86 */ +struct kvm_sregs { + struct kvm_segment cs, ds, es, fs, gs, ss; + struct kvm_segment tr, ldt; + struct kvm_dtable gdt, idt; + __u64 cr0, cr2, cr3, cr4, cr8; + __u64 efer; + __u64 apic_base; + __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64]; +}; + +/* ppc -- see arch/powerpc/include/asm/kvm.h */ + +interrupt_bitmap is a bitmap of pending external interrupts. At most +one bit may be set. This interrupt has been acknowledged by the APIC +but not yet injected into the cpu core. + +4.14 KVM_SET_SREGS + +Capability: basic +Architectures: x86, ppc +Type: vcpu ioctl +Parameters: struct kvm_sregs (in) +Returns: 0 on success, -1 on error + +Writes special registers into the vcpu. See KVM_GET_SREGS for the +data structures. + +4.15 KVM_TRANSLATE + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_translation (in/out) +Returns: 0 on success, -1 on error + +Translates a virtual address according to the vcpu's current address +translation mode. + +struct kvm_translation { + /* in */ + __u64 linear_address; + + /* out */ + __u64 physical_address; + __u8 valid; + __u8 writeable; + __u8 usermode; + __u8 pad[5]; +}; + +4.16 KVM_INTERRUPT + +Capability: basic +Architectures: x86, ppc +Type: vcpu ioctl +Parameters: struct kvm_interrupt (in) +Returns: 0 on success, -1 on error + +Queues a hardware interrupt vector to be injected. This is only +useful if in-kernel local APIC or equivalent is not used. + +/* for KVM_INTERRUPT */ +struct kvm_interrupt { + /* in */ + __u32 irq; +}; + +X86: + +Note 'irq' is an interrupt vector, not an interrupt pin or line. + +PPC: + +Queues an external interrupt to be injected. This ioctl is overleaded +with 3 different irq values: + +a) KVM_INTERRUPT_SET + + This injects an edge type external interrupt into the guest once it's ready + to receive interrupts. When injected, the interrupt is done. + +b) KVM_INTERRUPT_UNSET + + This unsets any pending interrupt. + + Only available with KVM_CAP_PPC_UNSET_IRQ. + +c) KVM_INTERRUPT_SET_LEVEL + + This injects a level type external interrupt into the guest context. The + interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET + is triggered. + + Only available with KVM_CAP_PPC_IRQ_LEVEL. + +Note that any value for 'irq' other than the ones stated above is invalid +and incurs unexpected behavior. + +4.17 KVM_DEBUG_GUEST + +Capability: basic +Architectures: none +Type: vcpu ioctl +Parameters: none) +Returns: -1 on error + +Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead. + +4.18 KVM_GET_MSRS + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_msrs (in/out) +Returns: 0 on success, -1 on error + +Reads model-specific registers from the vcpu. Supported msr indices can +be obtained using KVM_GET_MSR_INDEX_LIST. + +struct kvm_msrs { + __u32 nmsrs; /* number of msrs in entries */ + __u32 pad; + + struct kvm_msr_entry entries[0]; +}; + +struct kvm_msr_entry { + __u32 index; + __u32 reserved; + __u64 data; +}; + +Application code should set the 'nmsrs' member (which indicates the +size of the entries array) and the 'index' member of each array entry. +kvm will fill in the 'data' member. + +4.19 KVM_SET_MSRS + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_msrs (in) +Returns: 0 on success, -1 on error + +Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the +data structures. + +Application code should set the 'nmsrs' member (which indicates the +size of the entries array), and the 'index' and 'data' members of each +array entry. + +4.20 KVM_SET_CPUID + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_cpuid (in) +Returns: 0 on success, -1 on error + +Defines the vcpu responses to the cpuid instruction. Applications +should use the KVM_SET_CPUID2 ioctl if available. + + +struct kvm_cpuid_entry { + __u32 function; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding; +}; + +/* for KVM_SET_CPUID */ +struct kvm_cpuid { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry entries[0]; +}; + +4.21 KVM_SET_SIGNAL_MASK + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_signal_mask (in) +Returns: 0 on success, -1 on error + +Defines which signals are blocked during execution of KVM_RUN. This +signal mask temporarily overrides the threads signal mask. Any +unblocked signal received (except SIGKILL and SIGSTOP, which retain +their traditional behaviour) will cause KVM_RUN to return with -EINTR. + +Note the signal will only be delivered if not blocked by the original +signal mask. + +/* for KVM_SET_SIGNAL_MASK */ +struct kvm_signal_mask { + __u32 len; + __u8 sigset[0]; +}; + +4.22 KVM_GET_FPU + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_fpu (out) +Returns: 0 on success, -1 on error + +Reads the floating point state from the vcpu. + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u8 fpr[8][16]; + __u16 fcw; + __u16 fsw; + __u8 ftwx; /* in fxsave format */ + __u8 pad1; + __u16 last_opcode; + __u64 last_ip; + __u64 last_dp; + __u8 xmm[16][16]; + __u32 mxcsr; + __u32 pad2; +}; + +4.23 KVM_SET_FPU + +Capability: basic +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_fpu (in) +Returns: 0 on success, -1 on error + +Writes the floating point state to the vcpu. + +/* for KVM_GET_FPU and KVM_SET_FPU */ +struct kvm_fpu { + __u8 fpr[8][16]; + __u16 fcw; + __u16 fsw; + __u8 ftwx; /* in fxsave format */ + __u8 pad1; + __u16 last_opcode; + __u64 last_ip; + __u64 last_dp; + __u8 xmm[16][16]; + __u32 mxcsr; + __u32 pad2; +}; + +4.24 KVM_CREATE_IRQCHIP + +Capability: KVM_CAP_IRQCHIP +Architectures: x86, ia64 +Type: vm ioctl +Parameters: none +Returns: 0 on success, -1 on error + +Creates an interrupt controller model in the kernel. On x86, creates a virtual +ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a +local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23 +only go to the IOAPIC. On ia64, a IOSAPIC is created. + +4.25 KVM_IRQ_LINE + +Capability: KVM_CAP_IRQCHIP +Architectures: x86, ia64 +Type: vm ioctl +Parameters: struct kvm_irq_level +Returns: 0 on success, -1 on error + +Sets the level of a GSI input to the interrupt controller model in the kernel. +Requires that an interrupt controller model has been previously created with +KVM_CREATE_IRQCHIP. Note that edge-triggered interrupts require the level +to be set to 1 and then back to 0. + +struct kvm_irq_level { + union { + __u32 irq; /* GSI */ + __s32 status; /* not used for KVM_IRQ_LEVEL */ + }; + __u32 level; /* 0 or 1 */ +}; + +4.26 KVM_GET_IRQCHIP + +Capability: KVM_CAP_IRQCHIP +Architectures: x86, ia64 +Type: vm ioctl +Parameters: struct kvm_irqchip (in/out) +Returns: 0 on success, -1 on error + +Reads the state of a kernel interrupt controller created with +KVM_CREATE_IRQCHIP into a buffer provided by the caller. + +struct kvm_irqchip { + __u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */ + __u32 pad; + union { + char dummy[512]; /* reserving space */ + struct kvm_pic_state pic; + struct kvm_ioapic_state ioapic; + } chip; +}; + +4.27 KVM_SET_IRQCHIP + +Capability: KVM_CAP_IRQCHIP +Architectures: x86, ia64 +Type: vm ioctl +Parameters: struct kvm_irqchip (in) +Returns: 0 on success, -1 on error + +Sets the state of a kernel interrupt controller created with +KVM_CREATE_IRQCHIP from a buffer provided by the caller. + +struct kvm_irqchip { + __u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */ + __u32 pad; + union { + char dummy[512]; /* reserving space */ + struct kvm_pic_state pic; + struct kvm_ioapic_state ioapic; + } chip; +}; + +4.28 KVM_XEN_HVM_CONFIG + +Capability: KVM_CAP_XEN_HVM +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_xen_hvm_config (in) +Returns: 0 on success, -1 on error + +Sets the MSR that the Xen HVM guest uses to initialize its hypercall +page, and provides the starting address and size of the hypercall +blobs in userspace. When the guest writes the MSR, kvm copies one +page of a blob (32- or 64-bit, depending on the vcpu mode) to guest +memory. + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; + +4.29 KVM_GET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (out) +Returns: 0 on success, -1 on error + +Gets the current timestamp of kvmclock as seen by the current guest. In +conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.30 KVM_SET_CLOCK + +Capability: KVM_CAP_ADJUST_CLOCK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_clock_data (in) +Returns: 0 on success, -1 on error + +Sets the current timestamp of kvmclock to the value specified in its parameter. +In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios +such as migration. + +struct kvm_clock_data { + __u64 clock; /* kvmclock current value */ + __u32 flags; + __u32 pad[9]; +}; + +4.31 KVM_GET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Extended by: KVM_CAP_INTR_SHADOW +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (out) +Returns: 0 on success, -1 on error + +Gets currently pending exceptions, interrupts, and NMIs as well as related +states of the vcpu. + +struct kvm_vcpu_events { + struct { + __u8 injected; + __u8 nr; + __u8 has_error_code; + __u8 pad; + __u32 error_code; + } exception; + struct { + __u8 injected; + __u8 nr; + __u8 soft; + __u8 shadow; + } interrupt; + struct { + __u8 injected; + __u8 pending; + __u8 masked; + __u8 pad; + } nmi; + __u32 sipi_vector; + __u32 flags; +}; + +KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that +interrupt.shadow contains a valid state. Otherwise, this field is undefined. + +4.32 KVM_SET_VCPU_EVENTS + +Capability: KVM_CAP_VCPU_EVENTS +Extended by: KVM_CAP_INTR_SHADOW +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_vcpu_event (in) +Returns: 0 on success, -1 on error + +Set pending exceptions, interrupts, and NMIs as well as related states of the +vcpu. + +See KVM_GET_VCPU_EVENTS for the data structure. + +Fields that may be modified asynchronously by running VCPUs can be excluded +from the update. These fields are nmi.pending and sipi_vector. Keep the +corresponding bits in the flags field cleared to suppress overwriting the +current in-kernel state. The bits are: + +KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel +KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector + +If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in +the flags field to signal that interrupt.shadow contains a valid state and +shall be written into the VCPU. + +4.33 KVM_GET_DEBUGREGS + +Capability: KVM_CAP_DEBUGREGS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_debugregs (out) +Returns: 0 on success, -1 on error + +Reads debug registers from the vcpu. + +struct kvm_debugregs { + __u64 db[4]; + __u64 dr6; + __u64 dr7; + __u64 flags; + __u64 reserved[9]; +}; + +4.34 KVM_SET_DEBUGREGS + +Capability: KVM_CAP_DEBUGREGS +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_debugregs (in) +Returns: 0 on success, -1 on error + +Writes debug registers into the vcpu. + +See KVM_GET_DEBUGREGS for the data structure. The flags field is unused +yet and must be cleared on entry. + +4.35 KVM_SET_USER_MEMORY_REGION + +Capability: KVM_CAP_USER_MEM +Architectures: all +Type: vm ioctl +Parameters: struct kvm_userspace_memory_region (in) +Returns: 0 on success, -1 on error + +struct kvm_userspace_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* start of the userspace allocated memory */ +}; + +/* for kvm_memory_region::flags */ +#define KVM_MEM_LOG_DIRTY_PAGES 1UL + +This ioctl allows the user to create or modify a guest physical memory +slot. When changing an existing slot, it may be moved in the guest +physical memory space, or its flags may be modified. It may not be +resized. Slots may not overlap in guest physical address space. + +Memory for the region is taken starting at the address denoted by the +field userspace_addr, which must point at user addressable memory for +the entire memory slot size. Any object may back this memory, including +anonymous memory, ordinary files, and hugetlbfs. + +It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr +be identical. This allows large pages in the guest to be backed by large +pages in the host. + +The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which +instructs kvm to keep track of writes to memory within the slot. See +the KVM_GET_DIRTY_LOG ioctl. + +When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory +region are automatically reflected into the guest. For example, an mmap() +that affects the region will be made visible immediately. Another example +is madvise(MADV_DROP). + +It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. +The KVM_SET_MEMORY_REGION does not allow fine grained control over memory +allocation and is deprecated. + +4.36 KVM_SET_TSS_ADDR + +Capability: KVM_CAP_SET_TSS_ADDR +Architectures: x86 +Type: vm ioctl +Parameters: unsigned long tss_address (in) +Returns: 0 on success, -1 on error + +This ioctl defines the physical address of a three-page region in the guest +physical address space. The region must be within the first 4GB of the +guest physical address space and must not conflict with any memory slot +or any mmio address. The guest may malfunction if it accesses this memory +region. + +This ioctl is required on Intel-based hosts. This is needed on Intel hardware +because of a quirk in the virtualization implementation (see the internals +documentation when it pops into existence). + +4.37 KVM_ENABLE_CAP + +Capability: KVM_CAP_ENABLE_CAP +Architectures: ppc +Type: vcpu ioctl +Parameters: struct kvm_enable_cap (in) +Returns: 0 on success; -1 on error + ++Not all extensions are enabled by default. Using this ioctl the application +can enable an extension, making it available to the guest. + +On systems that do not support this ioctl, it always fails. On systems that +do support it, it only works for extensions that are supported for enablement. + +To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should +be used. + +struct kvm_enable_cap { + /* in */ + __u32 cap; + +The capability that is supposed to get enabled. + + __u32 flags; + +A bitfield indicating future enhancements. Has to be 0 for now. + + __u64 args[4]; + +Arguments for enabling a feature. If a feature needs initial values to +function properly, this is the place to put them. + + __u8 pad[64]; +}; + +4.38 KVM_GET_MP_STATE + +Capability: KVM_CAP_MP_STATE +Architectures: x86, ia64 +Type: vcpu ioctl +Parameters: struct kvm_mp_state (out) +Returns: 0 on success; -1 on error + +struct kvm_mp_state { + __u32 mp_state; +}; + +Returns the vcpu's current "multiprocessing state" (though also valid on +uniprocessor guests). + +Possible values are: + + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running + - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) + which has not yet received an INIT signal + - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is + now ready for a SIPI + - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and + is waiting for an interrupt + - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector + accessible via KVM_GET_VCPU_EVENTS) + +This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel +irqchip, the multiprocessing state must be maintained by userspace. + +4.39 KVM_SET_MP_STATE + +Capability: KVM_CAP_MP_STATE +Architectures: x86, ia64 +Type: vcpu ioctl +Parameters: struct kvm_mp_state (in) +Returns: 0 on success; -1 on error + +Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for +arguments. + +This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel +irqchip, the multiprocessing state must be maintained by userspace. + +4.40 KVM_SET_IDENTITY_MAP_ADDR + +Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR +Architectures: x86 +Type: vm ioctl +Parameters: unsigned long identity (in) +Returns: 0 on success, -1 on error + +This ioctl defines the physical address of a one-page region in the guest +physical address space. The region must be within the first 4GB of the +guest physical address space and must not conflict with any memory slot +or any mmio address. The guest may malfunction if it accesses this memory +region. + +This ioctl is required on Intel-based hosts. This is needed on Intel hardware +because of a quirk in the virtualization implementation (see the internals +documentation when it pops into existence). + +4.41 KVM_SET_BOOT_CPU_ID + +Capability: KVM_CAP_SET_BOOT_CPU_ID +Architectures: x86, ia64 +Type: vm ioctl +Parameters: unsigned long vcpu_id +Returns: 0 on success, -1 on error + +Define which vcpu is the Bootstrap Processor (BSP). Values are the same +as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default +is vcpu 0. + +4.42 KVM_GET_XSAVE + +Capability: KVM_CAP_XSAVE +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xsave (out) +Returns: 0 on success, -1 on error + +struct kvm_xsave { + __u32 region[1024]; +}; + +This ioctl would copy current vcpu's xsave struct to the userspace. + +4.43 KVM_SET_XSAVE + +Capability: KVM_CAP_XSAVE +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xsave (in) +Returns: 0 on success, -1 on error + +struct kvm_xsave { + __u32 region[1024]; +}; + +This ioctl would copy userspace's xsave struct to the kernel. + +4.44 KVM_GET_XCRS + +Capability: KVM_CAP_XCRS +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xcrs (out) +Returns: 0 on success, -1 on error + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + +This ioctl would copy current vcpu's xcrs to the userspace. + +4.45 KVM_SET_XCRS + +Capability: KVM_CAP_XCRS +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_xcrs (in) +Returns: 0 on success, -1 on error + +struct kvm_xcr { + __u32 xcr; + __u32 reserved; + __u64 value; +}; + +struct kvm_xcrs { + __u32 nr_xcrs; + __u32 flags; + struct kvm_xcr xcrs[KVM_MAX_XCRS]; + __u64 padding[16]; +}; + +This ioctl would set vcpu's xcr to the value userspace specified. + +4.46 KVM_GET_SUPPORTED_CPUID + +Capability: KVM_CAP_EXT_CPUID +Architectures: x86 +Type: system ioctl +Parameters: struct kvm_cpuid2 (in/out) +Returns: 0 on success, -1 on error + +struct kvm_cpuid2 { + __u32 nent; + __u32 padding; + struct kvm_cpuid_entry2 entries[0]; +}; + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 +#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 +#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +This ioctl returns x86 cpuid features which are supported by both the hardware +and kvm. Userspace can use the information returned by this ioctl to +construct cpuid information (for KVM_SET_CPUID2) that is consistent with +hardware, kernel, and userspace capabilities, and with user requirements (for +example, the user may wish to constrain cpuid to emulate older hardware, +or for feature consistency across a cluster). + +Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure +with the 'nent' field indicating the number of entries in the variable-size +array 'entries'. If the number of entries is too low to describe the cpu +capabilities, an error (E2BIG) is returned. If the number is too high, +the 'nent' field is adjusted and an error (ENOMEM) is returned. If the +number is just right, the 'nent' field is adjusted to the number of valid +entries in the 'entries' array, which is then filled. + +The entries returned are the host cpuid as returned by the cpuid instruction, +with unknown or unsupported features masked out. Some features (for example, +x2apic), may not be present in the host cpu, but are exposed by kvm if it can +emulate them efficiently. The fields in each entry are defined as follows: + + function: the eax value used to obtain the entry + index: the ecx value used to obtain the entry (for entries that are + affected by ecx) + flags: an OR of zero or more of the following: + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: + if the index field is valid + KVM_CPUID_FLAG_STATEFUL_FUNC: + if cpuid for this function returns different values for successive + invocations; there will be several entries with the same function, + all with this flag set + KVM_CPUID_FLAG_STATE_READ_NEXT: + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is + the first entry to be read by a cpu + eax, ebx, ecx, edx: the values returned by the cpuid instruction for + this function/index combination + +4.47 KVM_PPC_GET_PVINFO + +Capability: KVM_CAP_PPC_GET_PVINFO +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_ppc_pvinfo (out) +Returns: 0 on success, !0 on error + +struct kvm_ppc_pvinfo { + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +This ioctl fetches PV specific information that need to be passed to the guest +using the device tree or other means from vm context. + +For now the only implemented piece of information distributed here is an array +of 4 instructions that make up a hypercall. + +If any additional field gets added to this structure later on, a bit for that +additional piece of information will be set in the flags bitmap. + +4.48 KVM_ASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_ASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Assigns a host PCI device to the VM. + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; + __u32 segnr; + union { + __u32 reserved[11]; + }; +}; + +The PCI device is specified by the triple segnr, busnr, and devfn. +Identification in succeeding service requests is done via assigned_dev_id. The +following flags are specified: + +/* Depends on KVM_CAP_IOMMU */ +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + +4.49 KVM_DEASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_DEASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Ends PCI device assignment, releasing all associated resources. + +See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is +used in kvm_assigned_pci_dev to identify the device. + +4.50 KVM_ASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Assigns an IRQ to a passed-through device. + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; + union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } guest_msi; + __u32 reserved[12]; + }; +}; + +The following flags are defined: + +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +It is not valid to specify multiple types per host or guest IRQ. However, the +IRQ type of host and guest can differ or can even be null. + +4.51 KVM_DEASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Ends an IRQ assignment to a passed-through device. + +See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified +by assigned_dev_id, flags must correspond to the IRQ type specified on +KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. + +4.52 KVM_SET_GSI_ROUTING + +Capability: KVM_CAP_IRQ_ROUTING +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_irq_routing (in) +Returns: 0 on success, -1 on error + +Sets the GSI routing table entries, overwriting any previously set entries. + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; + __u32 pad[8]; + } u; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 pad; +}; + +4.53 KVM_ASSIGN_SET_MSIX_NR + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_nr (in) +Returns: 0 on success, -1 on error + +Set the number of MSI-X interrupts for an assigned device. This service can +only be called once in the lifetime of an assigned device. + +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 256 + +4.54 KVM_ASSIGN_SET_MSIX_ENTRY + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_entry (in) +Returns: 0 on success, -1 on error + +Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting +the GSI vector to zero means disabling the interrupt. + +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + +4.54 KVM_SET_TSC_KHZ + +Capability: KVM_CAP_TSC_CONTROL +Architectures: x86 +Type: vcpu ioctl +Parameters: virtual tsc_khz +Returns: 0 on success, -1 on error + +Specifies the tsc frequency for the virtual machine. The unit of the +frequency is KHz. + +4.55 KVM_GET_TSC_KHZ + +Capability: KVM_CAP_GET_TSC_KHZ +Architectures: x86 +Type: vcpu ioctl +Parameters: none +Returns: virtual tsc-khz on success, negative value on error + +Returns the tsc frequency of the guest. The unit of the return value is +KHz. If the host has unstable tsc this ioctl returns -EIO instead as an +error. + +5. The kvm_run structure + +Application code obtains a pointer to the kvm_run structure by +mmap()ing a vcpu fd. From that point, application code can control +execution by changing fields in kvm_run prior to calling the KVM_RUN +ioctl, and obtain information about the reason KVM_RUN returned by +looking up structure members. + +struct kvm_run { + /* in */ + __u8 request_interrupt_window; + +Request that KVM_RUN return when it becomes possible to inject external +interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. + + __u8 padding1[7]; + + /* out */ + __u32 exit_reason; + +When KVM_RUN has returned successfully (return value 0), this informs +application code why KVM_RUN has returned. Allowable values for this +field are detailed below. + + __u8 ready_for_interrupt_injection; + +If request_interrupt_window has been specified, this field indicates +an interrupt can be injected now with KVM_INTERRUPT. + + __u8 if_flag; + +The value of the current interrupt flag. Only valid if in-kernel +local APIC is not used. + + __u8 padding2[2]; + + /* in (pre_kvm_run), out (post_kvm_run) */ + __u64 cr8; + +The value of the cr8 register. Only valid if in-kernel local APIC is +not used. Both input and output. + + __u64 apic_base; + +The value of the APIC BASE msr. Only valid if in-kernel local +APIC is not used. Both input and output. + + union { + /* KVM_EXIT_UNKNOWN */ + struct { + __u64 hardware_exit_reason; + } hw; + +If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown +reasons. Further architecture-specific information is available in +hardware_exit_reason. + + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; + } fail_entry; + +If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due +to unknown reasons. Further architecture-specific information is +available in hardware_entry_failure_reason. + + /* KVM_EXIT_EXCEPTION */ + struct { + __u32 exception; + __u32 error_code; + } ex; + +Unused. + + /* KVM_EXIT_IO */ + struct { +#define KVM_EXIT_IO_IN 0 +#define KVM_EXIT_IO_OUT 1 + __u8 direction; + __u8 size; /* bytes */ + __u16 port; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ + } io; + +If exit_reason is KVM_EXIT_IO, then the vcpu has +executed a port I/O instruction which could not be satisfied by kvm. +data_offset describes where the data is located (KVM_EXIT_IO_OUT) or +where kvm expects application code to place the data for the next +KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array. + + struct { + struct kvm_debug_exit_arch arch; + } debug; + +Unused. + + /* KVM_EXIT_MMIO */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } mmio; + +If exit_reason is KVM_EXIT_MMIO, then the vcpu has +executed a memory-mapped I/O instruction which could not be satisfied +by kvm. The 'data' member contains the written data if 'is_write' is +true, and should be filled by application code otherwise. + +NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO and KVM_EXIT_OSI, the corresponding +operations are complete (and guest state is consistent) only after userspace +has re-entered the kernel with KVM_RUN. The kernel side will first finish +incomplete operations and then check for pending signals. Userspace +can re-enter the guest with an unmasked signal pending to complete +pending operations. + + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 nr; + __u64 args[6]; + __u64 ret; + __u32 longmode; + __u32 pad; + } hypercall; + +Unused. This was once used for 'hypercall to userspace'. To implement +such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390). +Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO. + + /* KVM_EXIT_TPR_ACCESS */ + struct { + __u64 rip; + __u32 is_write; + __u32 pad; + } tpr_access; + +To be documented (KVM_TPR_ACCESS_REPORTING). + + /* KVM_EXIT_S390_SIEIC */ + struct { + __u8 icptcode; + __u64 mask; /* psw upper half */ + __u64 addr; /* psw lower half */ + __u16 ipa; + __u32 ipb; + } s390_sieic; + +s390 specific. + + /* KVM_EXIT_S390_RESET */ +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + +s390 specific. + + /* KVM_EXIT_DCR */ + struct { + __u32 dcrn; + __u32 data; + __u8 is_write; + } dcr; + +powerpc specific. + + /* KVM_EXIT_OSI */ + struct { + __u64 gprs[32]; + } osi; + +MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch +hypercalls and exit with this exit struct that contains all the guest gprs. + +If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall. +Userspace can now handle the hypercall and when it's done modify the gprs as +necessary. Upon guest entry all guest GPRs will then be replaced by the values +in this struct. + + /* Fix the size of the union. */ + char padding[256]; + }; +}; diff --git a/Documentation/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt similarity index 100% rename from Documentation/kvm/cpuid.txt rename to Documentation/virtual/kvm/cpuid.txt diff --git a/Documentation/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt similarity index 100% rename from Documentation/kvm/locking.txt rename to Documentation/virtual/kvm/locking.txt diff --git a/Documentation/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt similarity index 100% rename from Documentation/kvm/mmu.txt rename to Documentation/virtual/kvm/mmu.txt diff --git a/Documentation/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt similarity index 100% rename from Documentation/kvm/msr.txt rename to Documentation/virtual/kvm/msr.txt diff --git a/Documentation/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt similarity index 100% rename from Documentation/kvm/ppc-pv.txt rename to Documentation/virtual/kvm/ppc-pv.txt diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt new file mode 100644 index 000000000000..a850986ed684 --- /dev/null +++ b/Documentation/virtual/kvm/review-checklist.txt @@ -0,0 +1,38 @@ +Review checklist for kvm patches +================================ + +1. The patch must follow Documentation/CodingStyle and + Documentation/SubmittingPatches. + +2. Patches should be against kvm.git master branch. + +3. If the patch introduces or modifies a new userspace API: + - the API must be documented in Documentation/virtual/kvm/api.txt + - the API must be discoverable using KVM_CHECK_EXTENSION + +4. New state must include support for save/restore. + +5. New features must default to off (userspace should explicitly request them). + Performance improvements can and should default to on. + +6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2 + +7. Emulator changes should be accompanied by unit tests for qemu-kvm.git + kvm/test directory. + +8. Changes should be vendor neutral when possible. Changes to common code + are better than duplicating changes to vendor code. + +9. Similarly, prefer changes to arch independent code than to arch dependent + code. + +10. User/kernel interfaces and guest/host interfaces must be 64-bit clean + (all variables and sizes naturally aligned on 64-bit; use specific types + only - u64 rather than ulong). + +11. New guest visible features must either be documented in a hardware manual + or be accompanied by documentation. + +12. Features must be robust against reset and kexec - for example, shared + host/guest memory must be unshared to prevent the host from writing to + guest memory that the guest has not reserved for this purpose. diff --git a/Documentation/kvm/timekeeping.txt b/Documentation/virtual/kvm/timekeeping.txt similarity index 100% rename from Documentation/kvm/timekeeping.txt rename to Documentation/virtual/kvm/timekeeping.txt diff --git a/Documentation/lguest/.gitignore b/Documentation/virtual/lguest/.gitignore similarity index 100% rename from Documentation/lguest/.gitignore rename to Documentation/virtual/lguest/.gitignore diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile new file mode 100644 index 000000000000..0ac34206f7a7 --- /dev/null +++ b/Documentation/virtual/lguest/Makefile @@ -0,0 +1,8 @@ +# This creates the demonstration utility "lguest" which runs a Linux guest. +# Missing headers? Add "-I../../../include -I../../../arch/x86/include" +CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE + +all: lguest + +clean: + rm -f lguest diff --git a/Documentation/lguest/extract b/Documentation/virtual/lguest/extract similarity index 100% rename from Documentation/lguest/extract rename to Documentation/virtual/lguest/extract diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c new file mode 100644 index 000000000000..cd9d6af61d07 --- /dev/null +++ b/Documentation/virtual/lguest/lguest.c @@ -0,0 +1,2077 @@ +/*P:100 + * This is the Launcher code, a simple program which lays out the "physical" + * memory for the new Guest by mapping the kernel image and the virtual + * devices, then opens /dev/lguest to tell the kernel about the Guest and + * control it. +:*/ +#define _LARGEFILE64_SOURCE +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include "../../../include/linux/lguest_launcher.h" +/*L:110 + * We can ignore the 42 include files we need for this program, but I do want + * to draw attention to the use of kernel-style types. + * + * As Linus said, "C is a Spartan language, and so should your naming be." I + * like these abbreviations, so we define them here. Note that u64 is always + * unsigned long long, which works on all Linux systems: this means that we can + * use %llu in printf for any u64. + */ +typedef unsigned long long u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; +/*:*/ + +#define PAGE_PRESENT 0x7 /* Present, RW, Execute */ +#define BRIDGE_PFX "bridge:" +#ifndef SIOCBRADDIF +#define SIOCBRADDIF 0x89a2 /* add interface to bridge */ +#endif +/* We can have up to 256 pages for devices. */ +#define DEVICE_PAGES 256 +/* This will occupy 3 pages: it must be a power of 2. */ +#define VIRTQUEUE_NUM 256 + +/*L:120 + * verbose is both a global flag and a macro. The C preprocessor allows + * this, and although I wouldn't recommend it, it works quite nicely here. + */ +static bool verbose; +#define verbose(args...) \ + do { if (verbose) printf(args); } while(0) +/*:*/ + +/* The pointer to the start of guest memory. */ +static void *guest_base; +/* The maximum guest physical address allowed, and maximum possible. */ +static unsigned long guest_limit, guest_max; +/* The /dev/lguest file descriptor. */ +static int lguest_fd; + +/* a per-cpu variable indicating whose vcpu is currently running */ +static unsigned int __thread cpu_id; + +/* This is our list of devices. */ +struct device_list { + /* Counter to assign interrupt numbers. */ + unsigned int next_irq; + + /* Counter to print out convenient device numbers. */ + unsigned int device_num; + + /* The descriptor page for the devices. */ + u8 *descpage; + + /* A single linked list of devices. */ + struct device *dev; + /* And a pointer to the last device for easy append. */ + struct device *lastdev; +}; + +/* The list of Guest devices, based on command line arguments. */ +static struct device_list devices; + +/* The device structure describes a single device. */ +struct device { + /* The linked-list pointer. */ + struct device *next; + + /* The device's descriptor, as mapped into the Guest. */ + struct lguest_device_desc *desc; + + /* We can't trust desc values once Guest has booted: we use these. */ + unsigned int feature_len; + unsigned int num_vq; + + /* The name of this device, for --verbose. */ + const char *name; + + /* Any queues attached to this device */ + struct virtqueue *vq; + + /* Is it operational */ + bool running; + + /* Device-specific data. */ + void *priv; +}; + +/* The virtqueue structure describes a queue attached to a device. */ +struct virtqueue { + struct virtqueue *next; + + /* Which device owns me. */ + struct device *dev; + + /* The configuration for this queue. */ + struct lguest_vqconfig config; + + /* The actual ring of buffers. */ + struct vring vring; + + /* Last available index we saw. */ + u16 last_avail_idx; + + /* How many are used since we sent last irq? */ + unsigned int pending_used; + + /* Eventfd where Guest notifications arrive. */ + int eventfd; + + /* Function for the thread which is servicing this virtqueue. */ + void (*service)(struct virtqueue *vq); + pid_t thread; +}; + +/* Remember the arguments to the program so we can "reboot" */ +static char **main_args; + +/* The original tty settings to restore on exit. */ +static struct termios orig_term; + +/* + * We have to be careful with barriers: our devices are all run in separate + * threads and so we need to make sure that changes visible to the Guest happen + * in precise order. + */ +#define wmb() __asm__ __volatile__("" : : : "memory") +#define mb() __asm__ __volatile__("" : : : "memory") + +/* + * Convert an iovec element to the given type. + * + * This is a fairly ugly trick: we need to know the size of the type and + * alignment requirement to check the pointer is kosher. It's also nice to + * have the name of the type in case we report failure. + * + * Typing those three things all the time is cumbersome and error prone, so we + * have a macro which sets them all up and passes to the real function. + */ +#define convert(iov, type) \ + ((type *)_convert((iov), sizeof(type), __alignof__(type), #type)) + +static void *_convert(struct iovec *iov, size_t size, size_t align, + const char *name) +{ + if (iov->iov_len != size) + errx(1, "Bad iovec size %zu for %s", iov->iov_len, name); + if ((unsigned long)iov->iov_base % align != 0) + errx(1, "Bad alignment %p for %s", iov->iov_base, name); + return iov->iov_base; +} + +/* Wrapper for the last available index. Makes it easier to change. */ +#define lg_last_avail(vq) ((vq)->last_avail_idx) + +/* + * The virtio configuration space is defined to be little-endian. x86 is + * little-endian too, but it's nice to be explicit so we have these helpers. + */ +#define cpu_to_le16(v16) (v16) +#define cpu_to_le32(v32) (v32) +#define cpu_to_le64(v64) (v64) +#define le16_to_cpu(v16) (v16) +#define le32_to_cpu(v32) (v32) +#define le64_to_cpu(v64) (v64) + +/* Is this iovec empty? */ +static bool iov_empty(const struct iovec iov[], unsigned int num_iov) +{ + unsigned int i; + + for (i = 0; i < num_iov; i++) + if (iov[i].iov_len) + return false; + return true; +} + +/* Take len bytes from the front of this iovec. */ +static void iov_consume(struct iovec iov[], unsigned num_iov, unsigned len) +{ + unsigned int i; + + for (i = 0; i < num_iov; i++) { + unsigned int used; + + used = iov[i].iov_len < len ? iov[i].iov_len : len; + iov[i].iov_base += used; + iov[i].iov_len -= used; + len -= used; + } + assert(len == 0); +} + +/* The device virtqueue descriptors are followed by feature bitmasks. */ +static u8 *get_feature_bits(struct device *dev) +{ + return (u8 *)(dev->desc + 1) + + dev->num_vq * sizeof(struct lguest_vqconfig); +} + +/*L:100 + * The Launcher code itself takes us out into userspace, that scary place where + * pointers run wild and free! Unfortunately, like most userspace programs, + * it's quite boring (which is why everyone likes to hack on the kernel!). + * Perhaps if you make up an Lguest Drinking Game at this point, it will get + * you through this section. Or, maybe not. + * + * The Launcher sets up a big chunk of memory to be the Guest's "physical" + * memory and stores it in "guest_base". In other words, Guest physical == + * Launcher virtual with an offset. + * + * This can be tough to get your head around, but usually it just means that we + * use these trivial conversion functions when the Guest gives us its + * "physical" addresses: + */ +static void *from_guest_phys(unsigned long addr) +{ + return guest_base + addr; +} + +static unsigned long to_guest_phys(const void *addr) +{ + return (addr - guest_base); +} + +/*L:130 + * Loading the Kernel. + * + * We start with couple of simple helper routines. open_or_die() avoids + * error-checking code cluttering the callers: + */ +static int open_or_die(const char *name, int flags) +{ + int fd = open(name, flags); + if (fd < 0) + err(1, "Failed to open %s", name); + return fd; +} + +/* map_zeroed_pages() takes a number of pages. */ +static void *map_zeroed_pages(unsigned int num) +{ + int fd = open_or_die("/dev/zero", O_RDONLY); + void *addr; + + /* + * We use a private mapping (ie. if we write to the page, it will be + * copied). We allocate an extra two pages PROT_NONE to act as guard + * pages against read/write attempts that exceed allocated space. + */ + addr = mmap(NULL, getpagesize() * (num+2), + PROT_NONE, MAP_PRIVATE, fd, 0); + + if (addr == MAP_FAILED) + err(1, "Mmapping %u pages of /dev/zero", num); + + if (mprotect(addr + getpagesize(), getpagesize() * num, + PROT_READ|PROT_WRITE) == -1) + err(1, "mprotect rw %u pages failed", num); + + /* + * One neat mmap feature is that you can close the fd, and it + * stays mapped. + */ + close(fd); + + /* Return address after PROT_NONE page */ + return addr + getpagesize(); +} + +/* Get some more pages for a device. */ +static void *get_pages(unsigned int num) +{ + void *addr = from_guest_phys(guest_limit); + + guest_limit += num * getpagesize(); + if (guest_limit > guest_max) + errx(1, "Not enough memory for devices"); + return addr; +} + +/* + * This routine is used to load the kernel or initrd. It tries mmap, but if + * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries), + * it falls back to reading the memory in. + */ +static void map_at(int fd, void *addr, unsigned long offset, unsigned long len) +{ + ssize_t r; + + /* + * We map writable even though for some segments are marked read-only. + * The kernel really wants to be writable: it patches its own + * instructions. + * + * MAP_PRIVATE means that the page won't be copied until a write is + * done to it. This allows us to share untouched memory between + * Guests. + */ + if (mmap(addr, len, PROT_READ|PROT_WRITE, + MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED) + return; + + /* pread does a seek and a read in one shot: saves a few lines. */ + r = pread(fd, addr, len, offset); + if (r != len) + err(1, "Reading offset %lu len %lu gave %zi", offset, len, r); +} + +/* + * This routine takes an open vmlinux image, which is in ELF, and maps it into + * the Guest memory. ELF = Embedded Linking Format, which is the format used + * by all modern binaries on Linux including the kernel. + * + * The ELF headers give *two* addresses: a physical address, and a virtual + * address. We use the physical address; the Guest will map itself to the + * virtual address. + * + * We return the starting address. + */ +static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr) +{ + Elf32_Phdr phdr[ehdr->e_phnum]; + unsigned int i; + + /* + * Sanity checks on the main ELF header: an x86 executable with a + * reasonable number of correctly-sized program headers. + */ + if (ehdr->e_type != ET_EXEC + || ehdr->e_machine != EM_386 + || ehdr->e_phentsize != sizeof(Elf32_Phdr) + || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr)) + errx(1, "Malformed elf header"); + + /* + * An ELF executable contains an ELF header and a number of "program" + * headers which indicate which parts ("segments") of the program to + * load where. + */ + + /* We read in all the program headers at once: */ + if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0) + err(1, "Seeking to program headers"); + if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr)) + err(1, "Reading program headers"); + + /* + * Try all the headers: there are usually only three. A read-only one, + * a read-write one, and a "note" section which we don't load. + */ + for (i = 0; i < ehdr->e_phnum; i++) { + /* If this isn't a loadable segment, we ignore it */ + if (phdr[i].p_type != PT_LOAD) + continue; + + verbose("Section %i: size %i addr %p\n", + i, phdr[i].p_memsz, (void *)phdr[i].p_paddr); + + /* We map this section of the file at its physical address. */ + map_at(elf_fd, from_guest_phys(phdr[i].p_paddr), + phdr[i].p_offset, phdr[i].p_filesz); + } + + /* The entry point is given in the ELF header. */ + return ehdr->e_entry; +} + +/*L:150 + * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed + * to jump into it and it will unpack itself. We used to have to perform some + * hairy magic because the unpacking code scared me. + * + * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote + * a small patch to jump over the tricky bits in the Guest, so now we just read + * the funky header so we know where in the file to load, and away we go! + */ +static unsigned long load_bzimage(int fd) +{ + struct boot_params boot; + int r; + /* Modern bzImages get loaded at 1M. */ + void *p = from_guest_phys(0x100000); + + /* + * Go back to the start of the file and read the header. It should be + * a Linux boot header (see Documentation/x86/i386/boot.txt) + */ + lseek(fd, 0, SEEK_SET); + read(fd, &boot, sizeof(boot)); + + /* Inside the setup_hdr, we expect the magic "HdrS" */ + if (memcmp(&boot.hdr.header, "HdrS", 4) != 0) + errx(1, "This doesn't look like a bzImage to me"); + + /* Skip over the extra sectors of the header. */ + lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET); + + /* Now read everything into memory. in nice big chunks. */ + while ((r = read(fd, p, 65536)) > 0) + p += r; + + /* Finally, code32_start tells us where to enter the kernel. */ + return boot.hdr.code32_start; +} + +/*L:140 + * Loading the kernel is easy when it's a "vmlinux", but most kernels + * come wrapped up in the self-decompressing "bzImage" format. With a little + * work, we can load those, too. + */ +static unsigned long load_kernel(int fd) +{ + Elf32_Ehdr hdr; + + /* Read in the first few bytes. */ + if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) + err(1, "Reading kernel"); + + /* If it's an ELF file, it starts with "\177ELF" */ + if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0) + return map_elf(fd, &hdr); + + /* Otherwise we assume it's a bzImage, and try to load it. */ + return load_bzimage(fd); +} + +/* + * This is a trivial little helper to align pages. Andi Kleen hated it because + * it calls getpagesize() twice: "it's dumb code." + * + * Kernel guys get really het up about optimization, even when it's not + * necessary. I leave this code as a reaction against that. + */ +static inline unsigned long page_align(unsigned long addr) +{ + /* Add upwards and truncate downwards. */ + return ((addr + getpagesize()-1) & ~(getpagesize()-1)); +} + +/*L:180 + * An "initial ram disk" is a disk image loaded into memory along with the + * kernel which the kernel can use to boot from without needing any drivers. + * Most distributions now use this as standard: the initrd contains the code to + * load the appropriate driver modules for the current machine. + * + * Importantly, James Morris works for RedHat, and Fedora uses initrds for its + * kernels. He sent me this (and tells me when I break it). + */ +static unsigned long load_initrd(const char *name, unsigned long mem) +{ + int ifd; + struct stat st; + unsigned long len; + + ifd = open_or_die(name, O_RDONLY); + /* fstat() is needed to get the file size. */ + if (fstat(ifd, &st) < 0) + err(1, "fstat() on initrd '%s'", name); + + /* + * We map the initrd at the top of memory, but mmap wants it to be + * page-aligned, so we round the size up for that. + */ + len = page_align(st.st_size); + map_at(ifd, from_guest_phys(mem - len), 0, st.st_size); + /* + * Once a file is mapped, you can close the file descriptor. It's a + * little odd, but quite useful. + */ + close(ifd); + verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len); + + /* We return the initrd size. */ + return len; +} +/*:*/ + +/* + * Simple routine to roll all the commandline arguments together with spaces + * between them. + */ +static void concat(char *dst, char *args[]) +{ + unsigned int i, len = 0; + + for (i = 0; args[i]; i++) { + if (i) { + strcat(dst+len, " "); + len++; + } + strcpy(dst+len, args[i]); + len += strlen(args[i]); + } + /* In case it's empty. */ + dst[len] = '\0'; +} + +/*L:185 + * This is where we actually tell the kernel to initialize the Guest. We + * saw the arguments it expects when we looked at initialize() in lguest_user.c: + * the base of Guest "physical" memory, the top physical page to allow and the + * entry point for the Guest. + */ +static void tell_kernel(unsigned long start) +{ + unsigned long args[] = { LHREQ_INITIALIZE, + (unsigned long)guest_base, + guest_limit / getpagesize(), start }; + verbose("Guest: %p - %p (%#lx)\n", + guest_base, guest_base + guest_limit, guest_limit); + lguest_fd = open_or_die("/dev/lguest", O_RDWR); + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Writing to /dev/lguest"); +} +/*:*/ + +/*L:200 + * Device Handling. + * + * When the Guest gives us a buffer, it sends an array of addresses and sizes. + * We need to make sure it's not trying to reach into the Launcher itself, so + * we have a convenient routine which checks it and exits with an error message + * if something funny is going on: + */ +static void *_check_pointer(unsigned long addr, unsigned int size, + unsigned int line) +{ + /* + * Check if the requested address and size exceeds the allocated memory, + * or addr + size wraps around. + */ + if ((addr + size) > guest_limit || (addr + size) < addr) + errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); + /* + * We return a pointer for the caller's convenience, now we know it's + * safe to use. + */ + return from_guest_phys(addr); +} +/* A macro which transparently hands the line number to the real function. */ +#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) + +/* + * Each buffer in the virtqueues is actually a chain of descriptors. This + * function returns the next descriptor in the chain, or vq->vring.num if we're + * at the end. + */ +static unsigned next_desc(struct vring_desc *desc, + unsigned int i, unsigned int max) +{ + unsigned int next; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc[i].flags & VRING_DESC_F_NEXT)) + return max; + + /* Check they're not leading us off end of descriptors. */ + next = desc[i].next; + /* Make sure compiler knows to grab that: we don't want it changing! */ + wmb(); + + if (next >= max) + errx(1, "Desc next is %u", next); + + return next; +} + +/* + * This actually sends the interrupt for this virtqueue, if we've used a + * buffer. + */ +static void trigger_irq(struct virtqueue *vq) +{ + unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; + + /* Don't inform them if nothing used. */ + if (!vq->pending_used) + return; + vq->pending_used = 0; + + /* If they don't want an interrupt, don't send one... */ + if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { + return; + } + + /* Send the Guest an interrupt tell them we used something up. */ + if (write(lguest_fd, buf, sizeof(buf)) != 0) + err(1, "Triggering irq %i", vq->config.irq); +} + +/* + * This looks in the virtqueue for the first available buffer, and converts + * it to an iovec for convenient access. Since descriptors consist of some + * number of output then some number of input descriptors, it's actually two + * iovecs, but we pack them into one and note how many of each there were. + * + * This function waits if necessary, and returns the descriptor number found. + */ +static unsigned wait_for_vq_desc(struct virtqueue *vq, + struct iovec iov[], + unsigned int *out_num, unsigned int *in_num) +{ + unsigned int i, head, max; + struct vring_desc *desc; + u16 last_avail = lg_last_avail(vq); + + /* There's nothing available? */ + while (last_avail == vq->vring.avail->idx) { + u64 event; + + /* + * Since we're about to sleep, now is a good time to tell the + * Guest about what we've used up to now. + */ + trigger_irq(vq); + + /* OK, now we need to know about added descriptors. */ + vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY; + + /* + * They could have slipped one in as we were doing that: make + * sure it's written, then check again. + */ + mb(); + if (last_avail != vq->vring.avail->idx) { + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + break; + } + + /* Nothing new? Wait for eventfd to tell us they refilled. */ + if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event)) + errx(1, "Event read failed?"); + + /* We don't need to be notified again. */ + vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY; + } + + /* Check it isn't doing very strange things with descriptor numbers. */ + if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) + errx(1, "Guest moved used index from %u to %u", + last_avail, vq->vring.avail->idx); + + /* + * Grab the next descriptor number they're advertising, and increment + * the index we've seen. + */ + head = vq->vring.avail->ring[last_avail % vq->vring.num]; + lg_last_avail(vq)++; + + /* If their number is silly, that's a fatal mistake. */ + if (head >= vq->vring.num) + errx(1, "Guest says index %u is available", head); + + /* When we start there are none of either input nor output. */ + *out_num = *in_num = 0; + + max = vq->vring.num; + desc = vq->vring.desc; + i = head; + + /* + * If this is an indirect entry, then this buffer contains a descriptor + * table which we handle as if it's any normal descriptor chain. + */ + if (desc[i].flags & VRING_DESC_F_INDIRECT) { + if (desc[i].len % sizeof(struct vring_desc)) + errx(1, "Invalid size for indirect buffer table"); + + max = desc[i].len / sizeof(struct vring_desc); + desc = check_pointer(desc[i].addr, desc[i].len); + i = 0; + } + + do { + /* Grab the first descriptor, and check it's OK. */ + iov[*out_num + *in_num].iov_len = desc[i].len; + iov[*out_num + *in_num].iov_base + = check_pointer(desc[i].addr, desc[i].len); + /* If this is an input descriptor, increment that count. */ + if (desc[i].flags & VRING_DESC_F_WRITE) + (*in_num)++; + else { + /* + * If it's an output descriptor, they're all supposed + * to come before any input descriptors. + */ + if (*in_num) + errx(1, "Descriptor has out after in"); + (*out_num)++; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (*out_num + *in_num > max) + errx(1, "Looped descriptor"); + } while ((i = next_desc(desc, i, max)) != max); + + return head; +} + +/* + * After we've used one of their buffers, we tell the Guest about it. Sometime + * later we'll want to send them an interrupt using trigger_irq(); note that + * wait_for_vq_desc() does that for us if it has to wait. + */ +static void add_used(struct virtqueue *vq, unsigned int head, int len) +{ + struct vring_used_elem *used; + + /* + * The virtqueue contains a ring of used buffers. Get a pointer to the + * next entry in that used ring. + */ + used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num]; + used->id = head; + used->len = len; + /* Make sure buffer is written before we update index. */ + wmb(); + vq->vring.used->idx++; + vq->pending_used++; +} + +/* And here's the combo meal deal. Supersize me! */ +static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len) +{ + add_used(vq, head, len); + trigger_irq(vq); +} + +/* + * The Console + * + * We associate some data with the console for our exit hack. + */ +struct console_abort { + /* How many times have they hit ^C? */ + int count; + /* When did they start? */ + struct timeval start; +}; + +/* This is the routine which handles console input (ie. stdin). */ +static void console_input(struct virtqueue *vq) +{ + int len; + unsigned int head, in_num, out_num; + struct console_abort *abort = vq->dev->priv; + struct iovec iov[vq->vring.num]; + + /* Make sure there's a descriptor available. */ + head = wait_for_vq_desc(vq, iov, &out_num, &in_num); + if (out_num) + errx(1, "Output buffers in console in queue?"); + + /* Read into it. This is where we usually wait. */ + len = readv(STDIN_FILENO, iov, in_num); + if (len <= 0) { + /* Ran out of input? */ + warnx("Failed to get console input, ignoring console."); + /* + * For simplicity, dying threads kill the whole Launcher. So + * just nap here. + */ + for (;;) + pause(); + } + + /* Tell the Guest we used a buffer. */ + add_used_and_trigger(vq, head, len); + + /* + * Three ^C within one second? Exit. + * + * This is such a hack, but works surprisingly well. Each ^C has to + * be in a buffer by itself, so they can't be too fast. But we check + * that we get three within about a second, so they can't be too + * slow. + */ + if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) { + abort->count = 0; + return; + } + + abort->count++; + if (abort->count == 1) + gettimeofday(&abort->start, NULL); + else if (abort->count == 3) { + struct timeval now; + gettimeofday(&now, NULL); + /* Kill all Launcher processes with SIGINT, like normal ^C */ + if (now.tv_sec <= abort->start.tv_sec+1) + kill(0, SIGINT); + abort->count = 0; + } +} + +/* This is the routine which handles console output (ie. stdout). */ +static void console_output(struct virtqueue *vq) +{ + unsigned int head, out, in; + struct iovec iov[vq->vring.num]; + + /* We usually wait in here, for the Guest to give us something. */ + head = wait_for_vq_desc(vq, iov, &out, &in); + if (in) + errx(1, "Input buffers in console output queue?"); + + /* writev can return a partial write, so we loop here. */ + while (!iov_empty(iov, out)) { + int len = writev(STDOUT_FILENO, iov, out); + if (len <= 0) + err(1, "Write to stdout gave %i", len); + iov_consume(iov, out, len); + } + + /* + * We're finished with that buffer: if we're going to sleep, + * wait_for_vq_desc() will prod the Guest with an interrupt. + */ + add_used(vq, head, 0); +} + +/* + * The Network + * + * Handling output for network is also simple: we get all the output buffers + * and write them to /dev/net/tun. + */ +struct net_info { + int tunfd; +}; + +static void net_output(struct virtqueue *vq) +{ + struct net_info *net_info = vq->dev->priv; + unsigned int head, out, in; + struct iovec iov[vq->vring.num]; + + /* We usually wait in here for the Guest to give us a packet. */ + head = wait_for_vq_desc(vq, iov, &out, &in); + if (in) + errx(1, "Input buffers in net output queue?"); + /* + * Send the whole thing through to /dev/net/tun. It expects the exact + * same format: what a coincidence! + */ + if (writev(net_info->tunfd, iov, out) < 0) + errx(1, "Write to tun failed?"); + + /* + * Done with that one; wait_for_vq_desc() will send the interrupt if + * all packets are processed. + */ + add_used(vq, head, 0); +} + +/* + * Handling network input is a bit trickier, because I've tried to optimize it. + * + * First we have a helper routine which tells is if from this file descriptor + * (ie. the /dev/net/tun device) will block: + */ +static bool will_block(int fd) +{ + fd_set fdset; + struct timeval zero = { 0, 0 }; + FD_ZERO(&fdset); + FD_SET(fd, &fdset); + return select(fd+1, &fdset, NULL, NULL, &zero) != 1; +} + +/* + * This handles packets coming in from the tun device to our Guest. Like all + * service routines, it gets called again as soon as it returns, so you don't + * see a while(1) loop here. + */ +static void net_input(struct virtqueue *vq) +{ + int len; + unsigned int head, out, in; + struct iovec iov[vq->vring.num]; + struct net_info *net_info = vq->dev->priv; + + /* + * Get a descriptor to write an incoming packet into. This will also + * send an interrupt if they're out of descriptors. + */ + head = wait_for_vq_desc(vq, iov, &out, &in); + if (out) + errx(1, "Output buffers in net input queue?"); + + /* + * If it looks like we'll block reading from the tun device, send them + * an interrupt. + */ + if (vq->pending_used && will_block(net_info->tunfd)) + trigger_irq(vq); + + /* + * Read in the packet. This is where we normally wait (when there's no + * incoming network traffic). + */ + len = readv(net_info->tunfd, iov, in); + if (len <= 0) + err(1, "Failed to read from tun."); + + /* + * Mark that packet buffer as used, but don't interrupt here. We want + * to wait until we've done as much work as we can. + */ + add_used(vq, head, len); +} +/*:*/ + +/* This is the helper to create threads: run the service routine in a loop. */ +static int do_thread(void *_vq) +{ + struct virtqueue *vq = _vq; + + for (;;) + vq->service(vq); + return 0; +} + +/* + * When a child dies, we kill our entire process group with SIGTERM. This + * also has the side effect that the shell restores the console for us! + */ +static void kill_launcher(int signal) +{ + kill(0, SIGTERM); +} + +static void reset_device(struct device *dev) +{ + struct virtqueue *vq; + + verbose("Resetting device %s\n", dev->name); + + /* Clear any features they've acked. */ + memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); + + /* We're going to be explicitly killing threads, so ignore them. */ + signal(SIGCHLD, SIG_IGN); + + /* Zero out the virtqueues, get rid of their threads */ + for (vq = dev->vq; vq; vq = vq->next) { + if (vq->thread != (pid_t)-1) { + kill(vq->thread, SIGTERM); + waitpid(vq->thread, NULL, 0); + vq->thread = (pid_t)-1; + } + memset(vq->vring.desc, 0, + vring_size(vq->config.num, LGUEST_VRING_ALIGN)); + lg_last_avail(vq) = 0; + } + dev->running = false; + + /* Now we care if threads die. */ + signal(SIGCHLD, (void *)kill_launcher); +} + +/*L:216 + * This actually creates the thread which services the virtqueue for a device. + */ +static void create_thread(struct virtqueue *vq) +{ + /* + * Create stack for thread. Since the stack grows upwards, we point + * the stack pointer to the end of this region. + */ + char *stack = malloc(32768); + unsigned long args[] = { LHREQ_EVENTFD, + vq->config.pfn*getpagesize(), 0 }; + + /* Create a zero-initialized eventfd. */ + vq->eventfd = eventfd(0, 0); + if (vq->eventfd < 0) + err(1, "Creating eventfd"); + args[2] = vq->eventfd; + + /* + * Attach an eventfd to this virtqueue: it will go off when the Guest + * does an LHCALL_NOTIFY for this vq. + */ + if (write(lguest_fd, &args, sizeof(args)) != 0) + err(1, "Attaching eventfd"); + + /* + * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so + * we get a signal if it dies. + */ + vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); + if (vq->thread == (pid_t)-1) + err(1, "Creating clone"); + + /* We close our local copy now the child has it. */ + close(vq->eventfd); +} + +static void start_device(struct device *dev) +{ + unsigned int i; + struct virtqueue *vq; + + verbose("Device %s OK: offered", dev->name); + for (i = 0; i < dev->feature_len; i++) + verbose(" %02x", get_feature_bits(dev)[i]); + verbose(", accepted"); + for (i = 0; i < dev->feature_len; i++) + verbose(" %02x", get_feature_bits(dev) + [dev->feature_len+i]); + + for (vq = dev->vq; vq; vq = vq->next) { + if (vq->service) + create_thread(vq); + } + dev->running = true; +} + +static void cleanup_devices(void) +{ + struct device *dev; + + for (dev = devices.dev; dev; dev = dev->next) + reset_device(dev); + + /* If we saved off the original terminal settings, restore them now. */ + if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) + tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); +} + +/* When the Guest tells us they updated the status field, we handle it. */ +static void update_device_status(struct device *dev) +{ + /* A zero status is a reset, otherwise it's a set of flags. */ + if (dev->desc->status == 0) + reset_device(dev); + else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { + warnx("Device %s configuration FAILED", dev->name); + if (dev->running) + reset_device(dev); + } else if (dev->desc->status & VIRTIO_CONFIG_S_DRIVER_OK) { + if (!dev->running) + start_device(dev); + } +} + +/*L:215 + * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In + * particular, it's used to notify us of device status changes during boot. + */ +static void handle_output(unsigned long addr) +{ + struct device *i; + + /* Check each device. */ + for (i = devices.dev; i; i = i->next) { + struct virtqueue *vq; + + /* + * Notifications to device descriptors mean they updated the + * device status. + */ + if (from_guest_phys(addr) == i->desc) { + update_device_status(i); + return; + } + + /* + * Devices *can* be used before status is set to DRIVER_OK. + * The original plan was that they would never do this: they + * would always finish setting up their status bits before + * actually touching the virtqueues. In practice, we allowed + * them to, and they do (eg. the disk probes for partition + * tables as part of initialization). + * + * If we see this, we start the device: once it's running, we + * expect the device to catch all the notifications. + */ + for (vq = i->vq; vq; vq = vq->next) { + if (addr != vq->config.pfn*getpagesize()) + continue; + if (i->running) + errx(1, "Notification on running %s", i->name); + /* This just calls create_thread() for each virtqueue */ + start_device(i); + return; + } + } + + /* + * Early console write is done using notify on a nul-terminated string + * in Guest memory. It's also great for hacking debugging messages + * into a Guest. + */ + if (addr >= guest_limit) + errx(1, "Bad NOTIFY %#lx", addr); + + write(STDOUT_FILENO, from_guest_phys(addr), + strnlen(from_guest_phys(addr), guest_limit - addr)); +} + +/*L:190 + * Device Setup + * + * All devices need a descriptor so the Guest knows it exists, and a "struct + * device" so the Launcher can keep track of it. We have common helper + * routines to allocate and manage them. + */ + +/* + * The layout of the device page is a "struct lguest_device_desc" followed by a + * number of virtqueue descriptors, then two sets of feature bits, then an + * array of configuration bytes. This routine returns the configuration + * pointer. + */ +static u8 *device_config(const struct device *dev) +{ + return (void *)(dev->desc + 1) + + dev->num_vq * sizeof(struct lguest_vqconfig) + + dev->feature_len * 2; +} + +/* + * This routine allocates a new "struct lguest_device_desc" from descriptor + * table page just above the Guest's normal memory. It returns a pointer to + * that descriptor. + */ +static struct lguest_device_desc *new_dev_desc(u16 type) +{ + struct lguest_device_desc d = { .type = type }; + void *p; + + /* Figure out where the next device config is, based on the last one. */ + if (devices.lastdev) + p = device_config(devices.lastdev) + + devices.lastdev->desc->config_len; + else + p = devices.descpage; + + /* We only have one page for all the descriptors. */ + if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) + errx(1, "Too many devices"); + + /* p might not be aligned, so we memcpy in. */ + return memcpy(p, &d, sizeof(d)); +} + +/* + * Each device descriptor is followed by the description of its virtqueues. We + * specify how many descriptors the virtqueue is to have. + */ +static void add_virtqueue(struct device *dev, unsigned int num_descs, + void (*service)(struct virtqueue *)) +{ + unsigned int pages; + struct virtqueue **i, *vq = malloc(sizeof(*vq)); + void *p; + + /* First we need some memory for this virtqueue. */ + pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) + / getpagesize(); + p = get_pages(pages); + + /* Initialize the virtqueue */ + vq->next = NULL; + vq->last_avail_idx = 0; + vq->dev = dev; + + /* + * This is the routine the service thread will run, and its Process ID + * once it's running. + */ + vq->service = service; + vq->thread = (pid_t)-1; + + /* Initialize the configuration. */ + vq->config.num = num_descs; + vq->config.irq = devices.next_irq++; + vq->config.pfn = to_guest_phys(p) / getpagesize(); + + /* Initialize the vring. */ + vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); + + /* + * Append virtqueue to this device's descriptor. We use + * device_config() to get the end of the device's current virtqueues; + * we check that we haven't added any config or feature information + * yet, otherwise we'd be overwriting them. + */ + assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); + memcpy(device_config(dev), &vq->config, sizeof(vq->config)); + dev->num_vq++; + dev->desc->num_vq++; + + verbose("Virtqueue page %#lx\n", to_guest_phys(p)); + + /* + * Add to tail of list, so dev->vq is first vq, dev->vq->next is + * second. + */ + for (i = &dev->vq; *i; i = &(*i)->next); + *i = vq; +} + +/* + * The first half of the feature bitmask is for us to advertise features. The + * second half is for the Guest to accept features. + */ +static void add_feature(struct device *dev, unsigned bit) +{ + u8 *features = get_feature_bits(dev); + + /* We can't extend the feature bits once we've added config bytes */ + if (dev->desc->feature_len <= bit / CHAR_BIT) { + assert(dev->desc->config_len == 0); + dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; + } + + features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); +} + +/* + * This routine sets the configuration fields for an existing device's + * descriptor. It only works for the last device, but that's OK because that's + * how we use it. + */ +static void set_config(struct device *dev, unsigned len, const void *conf) +{ + /* Check we haven't overflowed our single page. */ + if (device_config(dev) + len > devices.descpage + getpagesize()) + errx(1, "Too many devices"); + + /* Copy in the config information, and store the length. */ + memcpy(device_config(dev), conf, len); + dev->desc->config_len = len; + + /* Size must fit in config_len field (8 bits)! */ + assert(dev->desc->config_len == len); +} + +/* + * This routine does all the creation and setup of a new device, including + * calling new_dev_desc() to allocate the descriptor and device memory. We + * don't actually start the service threads until later. + * + * See what I mean about userspace being boring? + */ +static struct device *new_device(const char *name, u16 type) +{ + struct device *dev = malloc(sizeof(*dev)); + + /* Now we populate the fields one at a time. */ + dev->desc = new_dev_desc(type); + dev->name = name; + dev->vq = NULL; + dev->feature_len = 0; + dev->num_vq = 0; + dev->running = false; + + /* + * Append to device list. Prepending to a single-linked list is + * easier, but the user expects the devices to be arranged on the bus + * in command-line order. The first network device on the command line + * is eth0, the first block device /dev/vda, etc. + */ + if (devices.lastdev) + devices.lastdev->next = dev; + else + devices.dev = dev; + devices.lastdev = dev; + + return dev; +} + +/* + * Our first setup routine is the console. It's a fairly simple device, but + * UNIX tty handling makes it uglier than it could be. + */ +static void setup_console(void) +{ + struct device *dev; + + /* If we can save the initial standard input settings... */ + if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { + struct termios term = orig_term; + /* + * Then we turn off echo, line buffering and ^C etc: We want a + * raw input stream to the Guest. + */ + term.c_lflag &= ~(ISIG|ICANON|ECHO); + tcsetattr(STDIN_FILENO, TCSANOW, &term); + } + + dev = new_device("console", VIRTIO_ID_CONSOLE); + + /* We store the console state in dev->priv, and initialize it. */ + dev->priv = malloc(sizeof(struct console_abort)); + ((struct console_abort *)dev->priv)->count = 0; + + /* + * The console needs two virtqueues: the input then the output. When + * they put something the input queue, we make sure we're listening to + * stdin. When they put something in the output queue, we write it to + * stdout. + */ + add_virtqueue(dev, VIRTQUEUE_NUM, console_input); + add_virtqueue(dev, VIRTQUEUE_NUM, console_output); + + verbose("device %u: console\n", ++devices.device_num); +} +/*:*/ + +/*M:010 + * Inter-guest networking is an interesting area. Simplest is to have a + * --sharenet= option which opens or creates a named pipe. This can be + * used to send packets to another guest in a 1:1 manner. + * + * More sopisticated is to use one of the tools developed for project like UML + * to do networking. + * + * Faster is to do virtio bonding in kernel. Doing this 1:1 would be + * completely generic ("here's my vring, attach to your vring") and would work + * for any traffic. Of course, namespace and permissions issues need to be + * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide + * multiple inter-guest channels behind one interface, although it would + * require some manner of hotplugging new virtio channels. + * + * Finally, we could implement a virtio network switch in the kernel. +:*/ + +static u32 str2ip(const char *ipaddr) +{ + unsigned int b[4]; + + if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4) + errx(1, "Failed to parse IP address '%s'", ipaddr); + return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3]; +} + +static void str2mac(const char *macaddr, unsigned char mac[6]) +{ + unsigned int m[6]; + if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x", + &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6) + errx(1, "Failed to parse mac address '%s'", macaddr); + mac[0] = m[0]; + mac[1] = m[1]; + mac[2] = m[2]; + mac[3] = m[3]; + mac[4] = m[4]; + mac[5] = m[5]; +} + +/* + * This code is "adapted" from libbridge: it attaches the Host end of the + * network device to the bridge device specified by the command line. + * + * This is yet another James Morris contribution (I'm an IP-level guy, so I + * dislike bridging), and I just try not to break it. + */ +static void add_to_bridge(int fd, const char *if_name, const char *br_name) +{ + int ifidx; + struct ifreq ifr; + + if (!*br_name) + errx(1, "must specify bridge name"); + + ifidx = if_nametoindex(if_name); + if (!ifidx) + errx(1, "interface %s does not exist!", if_name); + + strncpy(ifr.ifr_name, br_name, IFNAMSIZ); + ifr.ifr_name[IFNAMSIZ-1] = '\0'; + ifr.ifr_ifindex = ifidx; + if (ioctl(fd, SIOCBRADDIF, &ifr) < 0) + err(1, "can't add %s to bridge %s", if_name, br_name); +} + +/* + * This sets up the Host end of the network device with an IP address, brings + * it up so packets will flow, the copies the MAC address into the hwaddr + * pointer. + */ +static void configure_device(int fd, const char *tapif, u32 ipaddr) +{ + struct ifreq ifr; + struct sockaddr_in sin; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, tapif); + + /* Don't read these incantations. Just cut & paste them like I did! */ + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = htonl(ipaddr); + memcpy(&ifr.ifr_addr, &sin, sizeof(sin)); + if (ioctl(fd, SIOCSIFADDR, &ifr) != 0) + err(1, "Setting %s interface address", tapif); + ifr.ifr_flags = IFF_UP; + if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0) + err(1, "Bringing interface %s up", tapif); +} + +static int get_tun_device(char tapif[IFNAMSIZ]) +{ + struct ifreq ifr; + int netfd; + + /* Start with this zeroed. Messy but sure. */ + memset(&ifr, 0, sizeof(ifr)); + + /* + * We open the /dev/net/tun device and tell it we want a tap device. A + * tap device is like a tun device, only somehow different. To tell + * the truth, I completely blundered my way through this code, but it + * works now! + */ + netfd = open_or_die("/dev/net/tun", O_RDWR); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; + strcpy(ifr.ifr_name, "tap%d"); + if (ioctl(netfd, TUNSETIFF, &ifr) != 0) + err(1, "configuring /dev/net/tun"); + + if (ioctl(netfd, TUNSETOFFLOAD, + TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0) + err(1, "Could not set features for tun device"); + + /* + * We don't need checksums calculated for packets coming in this + * device: trust us! + */ + ioctl(netfd, TUNSETNOCSUM, 1); + + memcpy(tapif, ifr.ifr_name, IFNAMSIZ); + return netfd; +} + +/*L:195 + * Our network is a Host<->Guest network. This can either use bridging or + * routing, but the principle is the same: it uses the "tun" device to inject + * packets into the Host as if they came in from a normal network card. We + * just shunt packets between the Guest and the tun device. + */ +static void setup_tun_net(char *arg) +{ + struct device *dev; + struct net_info *net_info = malloc(sizeof(*net_info)); + int ipfd; + u32 ip = INADDR_ANY; + bool bridging = false; + char tapif[IFNAMSIZ], *p; + struct virtio_net_config conf; + + net_info->tunfd = get_tun_device(tapif); + + /* First we create a new network device. */ + dev = new_device("net", VIRTIO_ID_NET); + dev->priv = net_info; + + /* Network devices need a recv and a send queue, just like console. */ + add_virtqueue(dev, VIRTQUEUE_NUM, net_input); + add_virtqueue(dev, VIRTQUEUE_NUM, net_output); + + /* + * We need a socket to perform the magic network ioctls to bring up the + * tap interface, connect to the bridge etc. Any socket will do! + */ + ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + if (ipfd < 0) + err(1, "opening IP socket"); + + /* If the command line was --tunnet=bridge: do bridging. */ + if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) { + arg += strlen(BRIDGE_PFX); + bridging = true; + } + + /* A mac address may follow the bridge name or IP address */ + p = strchr(arg, ':'); + if (p) { + str2mac(p+1, conf.mac); + add_feature(dev, VIRTIO_NET_F_MAC); + *p = '\0'; + } + + /* arg is now either an IP address or a bridge name */ + if (bridging) + add_to_bridge(ipfd, tapif, arg); + else + ip = str2ip(arg); + + /* Set up the tun device. */ + configure_device(ipfd, tapif, ip); + + /* Expect Guest to handle everything except UFO */ + add_feature(dev, VIRTIO_NET_F_CSUM); + add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); + add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); + add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); + add_feature(dev, VIRTIO_NET_F_GUEST_ECN); + add_feature(dev, VIRTIO_NET_F_HOST_TSO4); + add_feature(dev, VIRTIO_NET_F_HOST_TSO6); + add_feature(dev, VIRTIO_NET_F_HOST_ECN); + /* We handle indirect ring entries */ + add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); + set_config(dev, sizeof(conf), &conf); + + /* We don't need the socket any more; setup is done. */ + close(ipfd); + + devices.device_num++; + + if (bridging) + verbose("device %u: tun %s attached to bridge: %s\n", + devices.device_num, tapif, arg); + else + verbose("device %u: tun %s: %s\n", + devices.device_num, tapif, arg); +} +/*:*/ + +/* This hangs off device->priv. */ +struct vblk_info { + /* The size of the file. */ + off64_t len; + + /* The file descriptor for the file. */ + int fd; + +}; + +/*L:210 + * The Disk + * + * The disk only has one virtqueue, so it only has one thread. It is really + * simple: the Guest asks for a block number and we read or write that position + * in the file. + * + * Before we serviced each virtqueue in a separate thread, that was unacceptably + * slow: the Guest waits until the read is finished before running anything + * else, even if it could have been doing useful work. + * + * We could have used async I/O, except it's reputed to suck so hard that + * characters actually go missing from your code when you try to use it. + */ +static void blk_request(struct virtqueue *vq) +{ + struct vblk_info *vblk = vq->dev->priv; + unsigned int head, out_num, in_num, wlen; + int ret; + u8 *in; + struct virtio_blk_outhdr *out; + struct iovec iov[vq->vring.num]; + off64_t off; + + /* + * Get the next request, where we normally wait. It triggers the + * interrupt to acknowledge previously serviced requests (if any). + */ + head = wait_for_vq_desc(vq, iov, &out_num, &in_num); + + /* + * Every block request should contain at least one output buffer + * (detailing the location on disk and the type of request) and one + * input buffer (to hold the result). + */ + if (out_num == 0 || in_num == 0) + errx(1, "Bad virtblk cmd %u out=%u in=%u", + head, out_num, in_num); + + out = convert(&iov[0], struct virtio_blk_outhdr); + in = convert(&iov[out_num+in_num-1], u8); + /* + * For historical reasons, block operations are expressed in 512 byte + * "sectors". + */ + off = out->sector * 512; + + /* + * In general the virtio block driver is allowed to try SCSI commands. + * It'd be nice if we supported eject, for example, but we don't. + */ + if (out->type & VIRTIO_BLK_T_SCSI_CMD) { + fprintf(stderr, "Scsi commands unsupported\n"); + *in = VIRTIO_BLK_S_UNSUPP; + wlen = sizeof(*in); + } else if (out->type & VIRTIO_BLK_T_OUT) { + /* + * Write + * + * Move to the right location in the block file. This can fail + * if they try to write past end. + */ + if (lseek64(vblk->fd, off, SEEK_SET) != off) + err(1, "Bad seek to sector %llu", out->sector); + + ret = writev(vblk->fd, iov+1, out_num-1); + verbose("WRITE to sector %llu: %i\n", out->sector, ret); + + /* + * Grr... Now we know how long the descriptor they sent was, we + * make sure they didn't try to write over the end of the block + * file (possibly extending it). + */ + if (ret > 0 && off + ret > vblk->len) { + /* Trim it back to the correct length */ + ftruncate64(vblk->fd, vblk->len); + /* Die, bad Guest, die. */ + errx(1, "Write past end %llu+%u", off, ret); + } + + wlen = sizeof(*in); + *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); + } else if (out->type & VIRTIO_BLK_T_FLUSH) { + /* Flush */ + ret = fdatasync(vblk->fd); + verbose("FLUSH fdatasync: %i\n", ret); + wlen = sizeof(*in); + *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR); + } else { + /* + * Read + * + * Move to the right location in the block file. This can fail + * if they try to read past end. + */ + if (lseek64(vblk->fd, off, SEEK_SET) != off) + err(1, "Bad seek to sector %llu", out->sector); + + ret = readv(vblk->fd, iov+1, in_num-1); + verbose("READ from sector %llu: %i\n", out->sector, ret); + if (ret >= 0) { + wlen = sizeof(*in) + ret; + *in = VIRTIO_BLK_S_OK; + } else { + wlen = sizeof(*in); + *in = VIRTIO_BLK_S_IOERR; + } + } + + /* Finished that request. */ + add_used(vq, head, wlen); +} + +/*L:198 This actually sets up a virtual block device. */ +static void setup_block_file(const char *filename) +{ + struct device *dev; + struct vblk_info *vblk; + struct virtio_blk_config conf; + + /* Creat the device. */ + dev = new_device("block", VIRTIO_ID_BLOCK); + + /* The device has one virtqueue, where the Guest places requests. */ + add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); + + /* Allocate the room for our own bookkeeping */ + vblk = dev->priv = malloc(sizeof(*vblk)); + + /* First we open the file and store the length. */ + vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); + vblk->len = lseek64(vblk->fd, 0, SEEK_END); + + /* We support FLUSH. */ + add_feature(dev, VIRTIO_BLK_F_FLUSH); + + /* Tell Guest how many sectors this device has. */ + conf.capacity = cpu_to_le64(vblk->len / 512); + + /* + * Tell Guest not to put in too many descriptors at once: two are used + * for the in and out elements. + */ + add_feature(dev, VIRTIO_BLK_F_SEG_MAX); + conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); + + /* Don't try to put whole struct: we have 8 bit limit. */ + set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf); + + verbose("device %u: virtblock %llu sectors\n", + ++devices.device_num, le64_to_cpu(conf.capacity)); +} + +/*L:211 + * Our random number generator device reads from /dev/random into the Guest's + * input buffers. The usual case is that the Guest doesn't want random numbers + * and so has no buffers although /dev/random is still readable, whereas + * console is the reverse. + * + * The same logic applies, however. + */ +struct rng_info { + int rfd; +}; + +static void rng_input(struct virtqueue *vq) +{ + int len; + unsigned int head, in_num, out_num, totlen = 0; + struct rng_info *rng_info = vq->dev->priv; + struct iovec iov[vq->vring.num]; + + /* First we need a buffer from the Guests's virtqueue. */ + head = wait_for_vq_desc(vq, iov, &out_num, &in_num); + if (out_num) + errx(1, "Output buffers in rng?"); + + /* + * Just like the console write, we loop to cover the whole iovec. + * In this case, short reads actually happen quite a bit. + */ + while (!iov_empty(iov, in_num)) { + len = readv(rng_info->rfd, iov, in_num); + if (len <= 0) + err(1, "Read from /dev/random gave %i", len); + iov_consume(iov, in_num, len); + totlen += len; + } + + /* Tell the Guest about the new input. */ + add_used(vq, head, totlen); +} + +/*L:199 + * This creates a "hardware" random number device for the Guest. + */ +static void setup_rng(void) +{ + struct device *dev; + struct rng_info *rng_info = malloc(sizeof(*rng_info)); + + /* Our device's privat info simply contains the /dev/random fd. */ + rng_info->rfd = open_or_die("/dev/random", O_RDONLY); + + /* Create the new device. */ + dev = new_device("rng", VIRTIO_ID_RNG); + dev->priv = rng_info; + + /* The device has one virtqueue, where the Guest places inbufs. */ + add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); + + verbose("device %u: rng\n", devices.device_num++); +} +/* That's the end of device setup. */ + +/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */ +static void __attribute__((noreturn)) restart_guest(void) +{ + unsigned int i; + + /* + * Since we don't track all open fds, we simply close everything beyond + * stderr. + */ + for (i = 3; i < FD_SETSIZE; i++) + close(i); + + /* Reset all the devices (kills all threads). */ + cleanup_devices(); + + execv(main_args[0], main_args); + err(1, "Could not exec %s", main_args[0]); +} + +/*L:220 + * Finally we reach the core of the Launcher which runs the Guest, serves + * its input and output, and finally, lays it to rest. + */ +static void __attribute__((noreturn)) run_guest(void) +{ + for (;;) { + unsigned long notify_addr; + int readval; + + /* We read from the /dev/lguest device to run the Guest. */ + readval = pread(lguest_fd, ¬ify_addr, + sizeof(notify_addr), cpu_id); + + /* One unsigned long means the Guest did HCALL_NOTIFY */ + if (readval == sizeof(notify_addr)) { + verbose("Notify on address %#lx\n", notify_addr); + handle_output(notify_addr); + /* ENOENT means the Guest died. Reading tells us why. */ + } else if (errno == ENOENT) { + char reason[1024] = { 0 }; + pread(lguest_fd, reason, sizeof(reason)-1, cpu_id); + errx(1, "%s", reason); + /* ERESTART means that we need to reboot the guest */ + } else if (errno == ERESTART) { + restart_guest(); + /* Anything else means a bug or incompatible change. */ + } else + err(1, "Running guest failed"); + } +} +/*L:240 + * This is the end of the Launcher. The good news: we are over halfway + * through! The bad news: the most fiendish part of the code still lies ahead + * of us. + * + * Are you ready? Take a deep breath and join me in the core of the Host, in + * "make Host". +:*/ + +static struct option opts[] = { + { "verbose", 0, NULL, 'v' }, + { "tunnet", 1, NULL, 't' }, + { "block", 1, NULL, 'b' }, + { "rng", 0, NULL, 'r' }, + { "initrd", 1, NULL, 'i' }, + { "username", 1, NULL, 'u' }, + { "chroot", 1, NULL, 'c' }, + { NULL }, +}; +static void usage(void) +{ + errx(1, "Usage: lguest [--verbose] " + "[--tunnet=(:|bridge::)\n" + "|--block=|--initrd=]...\n" + " vmlinux [args...]"); +} + +/*L:105 The main routine is where the real work begins: */ +int main(int argc, char *argv[]) +{ + /* Memory, code startpoint and size of the (optional) initrd. */ + unsigned long mem = 0, start, initrd_size = 0; + /* Two temporaries. */ + int i, c; + /* The boot information for the Guest. */ + struct boot_params *boot; + /* If they specify an initrd file to load. */ + const char *initrd_name = NULL; + + /* Password structure for initgroups/setres[gu]id */ + struct passwd *user_details = NULL; + + /* Directory to chroot to */ + char *chroot_path = NULL; + + /* Save the args: we "reboot" by execing ourselves again. */ + main_args = argv; + + /* + * First we initialize the device list. We keep a pointer to the last + * device, and the next interrupt number to use for devices (1: + * remember that 0 is used by the timer). + */ + devices.lastdev = NULL; + devices.next_irq = 1; + + /* We're CPU 0. In fact, that's the only CPU possible right now. */ + cpu_id = 0; + + /* + * We need to know how much memory so we can set up the device + * descriptor and memory pages for the devices as we parse the command + * line. So we quickly look through the arguments to find the amount + * of memory now. + */ + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-') { + mem = atoi(argv[i]) * 1024 * 1024; + /* + * We start by mapping anonymous pages over all of + * guest-physical memory range. This fills it with 0, + * and ensures that the Guest won't be killed when it + * tries to access it. + */ + guest_base = map_zeroed_pages(mem / getpagesize() + + DEVICE_PAGES); + guest_limit = mem; + guest_max = mem + DEVICE_PAGES*getpagesize(); + devices.descpage = get_pages(1); + break; + } + } + + /* The options are fairly straight-forward */ + while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { + switch (c) { + case 'v': + verbose = true; + break; + case 't': + setup_tun_net(optarg); + break; + case 'b': + setup_block_file(optarg); + break; + case 'r': + setup_rng(); + break; + case 'i': + initrd_name = optarg; + break; + case 'u': + user_details = getpwnam(optarg); + if (!user_details) + err(1, "getpwnam failed, incorrect username?"); + break; + case 'c': + chroot_path = optarg; + break; + default: + warnx("Unknown argument %s", argv[optind]); + usage(); + } + } + /* + * After the other arguments we expect memory and kernel image name, + * followed by command line arguments for the kernel. + */ + if (optind + 2 > argc) + usage(); + + verbose("Guest base is at %p\n", guest_base); + + /* We always have a console device */ + setup_console(); + + /* Now we load the kernel */ + start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); + + /* Boot information is stashed at physical address 0 */ + boot = from_guest_phys(0); + + /* Map the initrd image if requested (at top of physical memory) */ + if (initrd_name) { + initrd_size = load_initrd(initrd_name, mem); + /* + * These are the location in the Linux boot header where the + * start and size of the initrd are expected to be found. + */ + boot->hdr.ramdisk_image = mem - initrd_size; + boot->hdr.ramdisk_size = initrd_size; + /* The bootloader type 0xFF means "unknown"; that's OK. */ + boot->hdr.type_of_loader = 0xFF; + } + + /* + * The Linux boot header contains an "E820" memory map: ours is a + * simple, single region. + */ + boot->e820_entries = 1; + boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM }); + /* + * The boot header contains a command line pointer: we put the command + * line after the boot header. + */ + boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1); + /* We use a simple helper to copy the arguments separated by spaces. */ + concat((char *)(boot + 1), argv+optind+2); + + /* Boot protocol version: 2.07 supports the fields for lguest. */ + boot->hdr.version = 0x207; + + /* The hardware_subarch value of "1" tells the Guest it's an lguest. */ + boot->hdr.hardware_subarch = 1; + + /* Tell the entry path not to try to reload segment registers. */ + boot->hdr.loadflags |= KEEP_SEGMENTS; + + /* + * We tell the kernel to initialize the Guest: this returns the open + * /dev/lguest file descriptor. + */ + tell_kernel(start); + + /* Ensure that we terminate if a device-servicing child dies. */ + signal(SIGCHLD, kill_launcher); + + /* If we exit via err(), this kills all the threads, restores tty. */ + atexit(cleanup_devices); + + /* If requested, chroot to a directory */ + if (chroot_path) { + if (chroot(chroot_path) != 0) + err(1, "chroot(\"%s\") failed", chroot_path); + + if (chdir("/") != 0) + err(1, "chdir(\"/\") failed"); + + verbose("chroot done\n"); + } + + /* If requested, drop privileges */ + if (user_details) { + uid_t u; + gid_t g; + + u = user_details->pw_uid; + g = user_details->pw_gid; + + if (initgroups(user_details->pw_name, g) != 0) + err(1, "initgroups failed"); + + if (setresgid(g, g, g) != 0) + err(1, "setresgid failed"); + + if (setresuid(u, u, u) != 0) + err(1, "setresuid failed"); + + verbose("Dropping privileges completed\n"); + } + + /* Finally, run the Guest. This doesn't return. */ + run_guest(); +} +/*:*/ + +/*M:999 + * Mastery is done: you now know everything I do. + * + * But surely you have seen code, features and bugs in your wanderings which + * you now yearn to attack? That is the real game, and I look forward to you + * patching and forking lguest into the Your-Name-Here-visor. + * + * Farewell, and good coding! + * Rusty Russell. + */ diff --git a/Documentation/virtual/lguest/lguest.txt b/Documentation/virtual/lguest/lguest.txt new file mode 100644 index 000000000000..bff0c554485d --- /dev/null +++ b/Documentation/virtual/lguest/lguest.txt @@ -0,0 +1,129 @@ + __ + (___()'`; Rusty's Remarkably Unreliable Guide to Lguest + /, /` - or, A Young Coder's Illustrated Hypervisor + \\"--\\ http://lguest.ozlabs.org + +Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel, +for Linux developers and users to experiment with virtualization with the +minimum of complexity. Nonetheless, it should have sufficient features to +make it useful for specific tasks, and, of course, you are encouraged to fork +and enhance it (see drivers/lguest/README). + +Features: + +- Kernel module which runs in a normal kernel. +- Simple I/O model for communication. +- Simple program to create new guests. +- Logo contains cute puppies: http://lguest.ozlabs.org + +Developer features: + +- Fun to hack on. +- No ABI: being tied to a specific kernel anyway, you can change anything. +- Many opportunities for improvement or feature implementation. + +Running Lguest: + +- The easiest way to run lguest is to use same kernel as guest and host. + You can configure them differently, but usually it's easiest not to. + + You will need to configure your kernel with the following options: + + "General setup": + "Prompt for development and/or incomplete code/drivers" = Y + (CONFIG_EXPERIMENTAL=y) + + "Processor type and features": + "Paravirtualized guest support" = Y + "Lguest guest support" = Y + "High Memory Support" = off/4GB + "Alignment value to which kernel should be aligned" = 0x100000 + (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and + CONFIG_PHYSICAL_ALIGN=0x100000) + + "Device Drivers": + "Block devices" + "Virtio block driver (EXPERIMENTAL)" = M/Y + "Network device support" + "Universal TUN/TAP device driver support" = M/Y + "Virtio network driver (EXPERIMENTAL)" = M/Y + (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m) + + "Virtualization" + "Linux hypervisor example code" = M/Y + (CONFIG_LGUEST=m) + +- A tool called "lguest" is available in this directory: type "make" + to build it. If you didn't build your kernel in-tree, use "make + O=". + +- Create or find a root disk image. There are several useful ones + around, such as the xm-test tiny root image at + http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img + + For more serious work, I usually use a distribution ISO image and + install it under qemu, then make multiple copies: + + dd if=/dev/zero of=rootfile bs=1M count=2048 + qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d + + Make sure that you install a getty on /dev/hvc0 if you want to log in on the + console! + +- "modprobe lg" if you built it as a module. + +- Run an lguest as root: + + Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \ + --block=rootfile root=/dev/vda + + Explanation: + 64: the amount of memory to use, in MB. + + vmlinux: the kernel image found in the top of your build directory. You + can also use a standard bzImage. + + --tunnet=192.168.19.1: configures a "tap" device for networking with this + IP address. + + --block=rootfile: a file or block device which becomes /dev/vda + inside the guest. + + root=/dev/vda: this (and anything else on the command line) are + kernel boot parameters. + +- Configuring networking. I usually have the host masquerade, using + "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 > + /proc/sys/net/ipv4/ip_forward". In this example, I would configure + eth0 inside the guest at 192.168.19.2. + + Another method is to bridge the tap device to an external interface + using --tunnet=bridge:, and perhaps run dhcp on the guest + to obtain an IP address. The bridge needs to be configured first: + this option simply adds the tap interface to it. + + A simple example on my system: + + ifconfig eth0 0.0.0.0 + brctl addbr lg0 + ifconfig lg0 up + brctl addif lg0 eth0 + dhclient lg0 + + Then use --tunnet=bridge:lg0 when launching the guest. + + See: + + http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge + + for general information on how to get bridging to work. + +- Random number generation. Using the --rng option will provide a + /dev/hwrng in the guest that will read from the host's /dev/random. + Use this option in conjunction with rng-tools (see ../hw_random.txt) + to provide entropy to the guest kernel's /dev/random. + +There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest + +Good luck! +Rusty Russell rusty@rustcorp.com.au. diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt new file mode 100644 index 000000000000..5d0fc8bfcdb9 --- /dev/null +++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt @@ -0,0 +1,4589 @@ + User Mode Linux HOWTO + User Mode Linux Core Team + Mon Nov 18 14:16:16 EST 2002 + + This document describes the use and abuse of Jeff Dike's User Mode + Linux: a port of the Linux kernel as a normal Intel Linux process. + ______________________________________________________________________ + + Table of Contents + + 1. Introduction + + 1.1 How is User Mode Linux Different? + 1.2 Why Would I Want User Mode Linux? + + 2. Compiling the kernel and modules + + 2.1 Compiling the kernel + 2.2 Compiling and installing kernel modules + 2.3 Compiling and installing uml_utilities + + 3. Running UML and logging in + + 3.1 Running UML + 3.2 Logging in + 3.3 Examples + + 4. UML on 2G/2G hosts + + 4.1 Introduction + 4.2 The problem + 4.3 The solution + + 5. Setting up serial lines and consoles + + 5.1 Specifying the device + 5.2 Specifying the channel + 5.3 Examples + + 6. Setting up the network + + 6.1 General setup + 6.2 Userspace daemons + 6.3 Specifying ethernet addresses + 6.4 UML interface setup + 6.5 Multicast + 6.6 TUN/TAP with the uml_net helper + 6.7 TUN/TAP with a preconfigured tap device + 6.8 Ethertap + 6.9 The switch daemon + 6.10 Slip + 6.11 Slirp + 6.12 pcap + 6.13 Setting up the host yourself + + 7. Sharing Filesystems between Virtual Machines + + 7.1 A warning + 7.2 Using layered block devices + 7.3 Note! + 7.4 Another warning + 7.5 uml_moo : Merging a COW file with its backing file + + 8. Creating filesystems + + 8.1 Create the filesystem file + 8.2 Assign the file to a UML device + 8.3 Creating and mounting the filesystem + + 9. Host file access + + 9.1 Using hostfs + 9.2 hostfs as the root filesystem + 9.3 Building hostfs + + 10. The Management Console + 10.1 version + 10.2 halt and reboot + 10.3 config + 10.4 remove + 10.5 sysrq + 10.6 help + 10.7 cad + 10.8 stop + 10.9 go + + 11. Kernel debugging + + 11.1 Starting the kernel under gdb + 11.2 Examining sleeping processes + 11.3 Running ddd on UML + 11.4 Debugging modules + 11.5 Attaching gdb to the kernel + 11.6 Using alternate debuggers + + 12. Kernel debugging examples + + 12.1 The case of the hung fsck + 12.2 Episode 2: The case of the hung fsck + + 13. What to do when UML doesn't work + + 13.1 Strange compilation errors when you build from source + 13.2 (obsolete) + 13.3 A variety of panics and hangs with /tmp on a reiserfs filesystem + 13.4 The compile fails with errors about conflicting types for 'open', 'dup', and 'waitpid' + 13.5 UML doesn't work when /tmp is an NFS filesystem + 13.6 UML hangs on boot when compiled with gprof support + 13.7 syslogd dies with a SIGTERM on startup + 13.8 TUN/TAP networking doesn't work on a 2.4 host + 13.9 You can network to the host but not to other machines on the net + 13.10 I have no root and I want to scream + 13.11 UML build conflict between ptrace.h and ucontext.h + 13.12 The UML BogoMips is exactly half the host's BogoMips + 13.13 When you run UML, it immediately segfaults + 13.14 xterms appear, then immediately disappear + 13.15 Any other panic, hang, or strange behavior + + 14. Diagnosing Problems + + 14.1 Case 1 : Normal kernel panics + 14.2 Case 2 : Tracing thread panics + 14.3 Case 3 : Tracing thread panics caused by other threads + 14.4 Case 4 : Hangs + + 15. Thanks + + 15.1 Code and Documentation + 15.2 Flushing out bugs + 15.3 Buglets and clean-ups + 15.4 Case Studies + 15.5 Other contributions + + + ______________________________________________________________________ + + 11.. IInnttrroodduuccttiioonn + + Welcome to User Mode Linux. It's going to be fun. + + + + 11..11.. HHooww iiss UUsseerr MMooddee LLiinnuuxx DDiiffffeerreenntt?? + + Normally, the Linux Kernel talks straight to your hardware (video + card, keyboard, hard drives, etc), and any programs which run ask the + kernel to operate the hardware, like so: + + + + +-----------+-----------+----+ + | Process 1 | Process 2 | ...| + +-----------+-----------+----+ + | Linux Kernel | + +----------------------------+ + | Hardware | + +----------------------------+ + + + + + The User Mode Linux Kernel is different; instead of talking to the + hardware, it talks to a `real' Linux kernel (called the `host kernel' + from now on), like any other program. Programs can then run inside + User-Mode Linux as if they were running under a normal kernel, like + so: + + + + +----------------+ + | Process 2 | ...| + +-----------+----------------+ + | Process 1 | User-Mode Linux| + +----------------------------+ + | Linux Kernel | + +----------------------------+ + | Hardware | + +----------------------------+ + + + + + + 11..22.. WWhhyy WWoouulldd II WWaanntt UUsseerr MMooddee LLiinnuuxx?? + + + 1. If User Mode Linux crashes, your host kernel is still fine. + + 2. You can run a usermode kernel as a non-root user. + + 3. You can debug the User Mode Linux like any normal process. + + 4. You can run gprof (profiling) and gcov (coverage testing). + + 5. You can play with your kernel without breaking things. + + 6. You can use it as a sandbox for testing new apps. + + 7. You can try new development kernels safely. + + 8. You can run different distributions simultaneously. + + 9. It's extremely fun. + + + + + + 22.. CCoommppiilliinngg tthhee kkeerrnneell aanndd mmoodduulleess + + + + + 22..11.. CCoommppiilliinngg tthhee kkeerrnneell + + + Compiling the user mode kernel is just like compiling any other + kernel. Let's go through the steps, using 2.4.0-prerelease (current + as of this writing) as an example: + + + 1. Download the latest UML patch from + + the download page + . + + + 3. Make a directory and unpack the kernel into it. + + + + host% + mkdir ~/uml + + + + + + + host% + cd ~/uml + + + + + + + host% + tar -xzvf linux-2.4.0-prerelease.tar.bz2 + + + + + + + 4. Apply the patch using + + + + host% + cd ~/uml/linux + + + + host% + bzcat uml-patch-2.4.0-prerelease.bz2 | patch -p1 + + + + + + + 5. Run your favorite config; `make xconfig ARCH=um' is the most + convenient. `make config ARCH=um' and 'make menuconfig ARCH=um' + will work as well. The defaults will give you a useful kernel. If + you want to change something, go ahead, it probably won't hurt + anything. + + + Note: If the host is configured with a 2G/2G address space split + rather than the usual 3G/1G split, then the packaged UML binaries + will not run. They will immediately segfault. See ``UML on 2G/2G + hosts'' for the scoop on running UML on your system. + + + + 6. Finish with `make linux ARCH=um': the result is a file called + `linux' in the top directory of your source tree. + + Make sure that you don't build this kernel in /usr/src/linux. On some + distributions, /usr/include/asm is a link into this pool. The user- + mode build changes the other end of that link, and things that include + stop compiling. + + The sources are also available from cvs at the project's cvs page, + which has directions on getting the sources. You can also browse the + CVS pool from there. + + If you get the CVS sources, you will have to check them out into an + empty directory. You will then have to copy each file into the + corresponding directory in the appropriate kernel pool. + + If you don't have the latest kernel pool, you can get the + corresponding user-mode sources with + + + host% cvs co -r v_2_3_x linux + + + + + where 'x' is the version in your pool. Note that you will not get the + bug fixes and enhancements that have gone into subsequent releases. + + + 22..22.. CCoommppiilliinngg aanndd iinnssttaalllliinngg kkeerrnneell mmoodduulleess + + UML modules are built in the same way as the native kernel (with the + exception of the 'ARCH=um' that you always need for UML): + + + host% make modules ARCH=um + + + + + Any modules that you want to load into this kernel need to be built in + the user-mode pool. Modules from the native kernel won't work. + + You can install them by using ftp or something to copy them into the + virtual machine and dropping them into /lib/modules/`uname -r`. + + You can also get the kernel build process to install them as follows: + + 1. with the kernel not booted, mount the root filesystem in the top + level of the kernel pool: + + + host% mount root_fs mnt -o loop + + + + + + + 2. run + + + host% + make modules_install INSTALL_MOD_PATH=`pwd`/mnt ARCH=um + + + + + + + 3. unmount the filesystem + + + host% umount mnt + + + + + + + 4. boot the kernel on it + + + When the system is booted, you can use insmod as usual to get the + modules into the kernel. A number of things have been loaded into UML + as modules, especially filesystems and network protocols and filters, + so most symbols which need to be exported probably already are. + However, if you do find symbols that need exporting, let us + know, and + they'll be "taken care of". + + + + 22..33.. CCoommppiilliinngg aanndd iinnssttaalllliinngg uummll__uuttiilliittiieess + + Many features of the UML kernel require a user-space helper program, + so a uml_utilities package is distributed separately from the kernel + patch which provides these helpers. Included within this is: + + +o port-helper - Used by consoles which connect to xterms or ports + + +o tunctl - Configuration tool to create and delete tap devices + + +o uml_net - Setuid binary for automatic tap device configuration + + +o uml_switch - User-space virtual switch required for daemon + transport + + The uml_utilities tree is compiled with: + + + host# + make && make install + + + + + Note that UML kernel patches may require a specific version of the + uml_utilities distribution. If you don't keep up with the mailing + lists, ensure that you have the latest release of uml_utilities if you + are experiencing problems with your UML kernel, particularly when + dealing with consoles or command-line switches to the helper programs + + + + + + + + + 33.. RRuunnnniinngg UUMMLL aanndd llooggggiinngg iinn + + + + 33..11.. RRuunnnniinngg UUMMLL + + It runs on 2.2.15 or later, and all 2.4 kernels. + + + Booting UML is straightforward. Simply run 'linux': it will try to + mount the file `root_fs' in the current directory. You do not need to + run it as root. If your root filesystem is not named `root_fs', then + you need to put a `ubd0=root_fs_whatever' switch on the linux command + line. + + + You will need a filesystem to boot UML from. There are a number + available for download from here . There are also several tools + which can be + used to generate UML-compatible filesystem images from media. + The kernel will boot up and present you with a login prompt. + + + Note: If the host is configured with a 2G/2G address space split + rather than the usual 3G/1G split, then the packaged UML binaries will + not run. They will immediately segfault. See ``UML on 2G/2G hosts'' + for the scoop on running UML on your system. + + + + 33..22.. LLooggggiinngg iinn + + + + The prepackaged filesystems have a root account with password 'root' + and a user account with password 'user'. The login banner will + generally tell you how to log in. So, you log in and you will find + yourself inside a little virtual machine. Our filesystems have a + variety of commands and utilities installed (and it is fairly easy to + add more), so you will have a lot of tools with which to poke around + the system. + + There are a couple of other ways to log in: + + +o On a virtual console + + + + Each virtual console that is configured (i.e. the device exists in + /dev and /etc/inittab runs a getty on it) will come up in its own + xterm. If you get tired of the xterms, read ``Setting up serial + lines and consoles'' to see how to attach the consoles to + something else, like host ptys. + + + + +o Over the serial line + + + In the boot output, find a line that looks like: + + + + serial line 0 assigned pty /dev/ptyp1 + + + + + Attach your favorite terminal program to the corresponding tty. I.e. + for minicom, the command would be + + + host% minicom -o -p /dev/ttyp1 + + + + + + + +o Over the net + + + If the network is running, then you can telnet to the virtual + machine and log in to it. See ``Setting up the network'' to learn + about setting up a virtual network. + + When you're done using it, run halt, and the kernel will bring itself + down and the process will exit. + + + 33..33.. EExxaammpplleess + + Here are some examples of UML in action: + + +o A login session + + +o A virtual network + + + + + + + + 44.. UUMMLL oonn 22GG//22GG hhoossttss + + + + + 44..11.. IInnttrroodduuccttiioonn + + + Most Linux machines are configured so that the kernel occupies the + upper 1G (0xc0000000 - 0xffffffff) of the 4G address space and + processes use the lower 3G (0x00000000 - 0xbfffffff). However, some + machine are configured with a 2G/2G split, with the kernel occupying + the upper 2G (0x80000000 - 0xffffffff) and processes using the lower + 2G (0x00000000 - 0x7fffffff). + + + + + 44..22.. TThhee pprroobblleemm + + + The prebuilt UML binaries on this site will not run on 2G/2G hosts + because UML occupies the upper .5G of the 3G process address space + (0xa0000000 - 0xbfffffff). Obviously, on 2G/2G hosts, this is right + in the middle of the kernel address space, so UML won't even load - it + will immediately segfault. + + + + + 44..33.. TThhee ssoolluuttiioonn + + + The fix for this is to rebuild UML from source after enabling + CONFIG_HOST_2G_2G (under 'General Setup'). This will cause UML to + load itself in the top .5G of that smaller process address space, + where it will run fine. See ``Compiling the kernel and modules'' if + you need help building UML from source. + + + + + + + + + + + 55.. SSeettttiinngg uupp sseerriiaall lliinneess aanndd ccoonnssoolleess + + + It is possible to attach UML serial lines and consoles to many types + of host I/O channels by specifying them on the command line. + + + You can attach them to host ptys, ttys, file descriptors, and ports. + This allows you to do things like + + +o have a UML console appear on an unused host console, + + +o hook two virtual machines together by having one attach to a pty + and having the other attach to the corresponding tty + + +o make a virtual machine accessible from the net by attaching a + console to a port on the host. + + + The general format of the command line option is device=channel. + + + + 55..11.. SSppeecciiffyyiinngg tthhee ddeevviiccee + + Devices are specified with "con" or "ssl" (console or serial line, + respectively), optionally with a device number if you are talking + about a specific device. + + + Using just "con" or "ssl" describes all of the consoles or serial + lines. If you want to talk about console #3 or serial line #10, they + would be "con3" and "ssl10", respectively. + + + A specific device name will override a less general "con=" or "ssl=". + So, for example, you can assign a pty to each of the serial lines + except for the first two like this: + + + ssl=pty ssl0=tty:/dev/tty0 ssl1=tty:/dev/tty1 + + + + + The specificity of the device name is all that matters; order on the + command line is irrelevant. + + + + 55..22.. SSppeecciiffyyiinngg tthhee cchhaannnneell + + There are a number of different types of channels to attach a UML + device to, each with a different way of specifying exactly what to + attach to. + + +o pseudo-terminals - device=pty pts terminals - device=pts + + + This will cause UML to allocate a free host pseudo-terminal for the + device. The terminal that it got will be announced in the boot + log. You access it by attaching a terminal program to the + corresponding tty: + + +o screen /dev/pts/n + + +o screen /dev/ttyxx + + +o minicom -o -p /dev/ttyxx - minicom seems not able to handle pts + devices + + +o kermit - start it up, 'open' the device, then 'connect' + + + + + + +o terminals - device=tty:tty device file + + + This will make UML attach the device to the specified tty (i.e + + + con1=tty:/dev/tty3 + + + + + will attach UML's console 1 to the host's /dev/tty3). If the tty that + you specify is the slave end of a tty/pty pair, something else must + have already opened the corresponding pty in order for this to work. + + + + + + +o xterms - device=xterm + + + UML will run an xterm and the device will be attached to it. + + + + + + +o Port - device=port:port number + + + This will attach the UML devices to the specified host port. + Attaching console 1 to the host's port 9000 would be done like + this: + + + con1=port:9000 + + + + + Attaching all the serial lines to that port would be done similarly: + + + ssl=port:9000 + + + + + You access these devices by telnetting to that port. Each active tel- + net session gets a different device. If there are more telnets to a + port than UML devices attached to it, then the extra telnet sessions + will block until an existing telnet detaches, or until another device + becomes active (i.e. by being activated in /etc/inittab). + + This channel has the advantage that you can both attach multiple UML + devices to it and know how to access them without reading the UML boot + log. It is also unique in allowing access to a UML from remote + machines without requiring that the UML be networked. This could be + useful in allowing public access to UMLs because they would be + accessible from the net, but wouldn't need any kind of network + filtering or access control because they would have no network access. + + + If you attach the main console to a portal, then the UML boot will + appear to hang. In reality, it's waiting for a telnet to connect, at + which point the boot will proceed. + + + + + + +o already-existing file descriptors - device=file descriptor + + + If you set up a file descriptor on the UML command line, you can + attach a UML device to it. This is most commonly used to put the + main console back on stdin and stdout after assigning all the other + consoles to something else: + + + con0=fd:0,fd:1 con=pts + + + + + + + + + +o Nothing - device=null + + + This allows the device to be opened, in contrast to 'none', but + reads will block, and writes will succeed and the data will be + thrown out. + + + + + + +o None - device=none + + + This causes the device to disappear. + + + + You can also specify different input and output channels for a device + by putting a comma between them: + + + ssl3=tty:/dev/tty2,xterm + + + + + will cause serial line 3 to accept input on the host's /dev/tty3 and + display output on an xterm. That's a silly example - the most common + use of this syntax is to reattach the main console to stdin and stdout + as shown above. + + + If you decide to move the main console away from stdin/stdout, the + initial boot output will appear in the terminal that you're running + UML in. However, once the console driver has been officially + initialized, then the boot output will start appearing wherever you + specified that console 0 should be. That device will receive all + subsequent output. + + + + 55..33.. EExxaammpplleess + + There are a number of interesting things you can do with this + capability. + + + First, this is how you get rid of those bleeding console xterms by + attaching them to host ptys: + + + con=pty con0=fd:0,fd:1 + + + + + This will make a UML console take over an unused host virtual console, + so that when you switch to it, you will see the UML login prompt + rather than the host login prompt: + + + con1=tty:/dev/tty6 + + + + + You can attach two virtual machines together with what amounts to a + serial line as follows: + + Run one UML with a serial line attached to a pty - + + + ssl1=pty + + + + + Look at the boot log to see what pty it got (this example will assume + that it got /dev/ptyp1). + + Boot the other UML with a serial line attached to the corresponding + tty - + + + ssl1=tty:/dev/ttyp1 + + + + + Log in, make sure that it has no getty on that serial line, attach a + terminal program like minicom to it, and you should see the login + prompt of the other virtual machine. + + + 66.. SSeettttiinngg uupp tthhee nneettwwoorrkk + + + + This page describes how to set up the various transports and to + provide a UML instance with network access to the host, other machines + on the local net, and the rest of the net. + + + As of 2.4.5, UML networking has been completely redone to make it much + easier to set up, fix bugs, and add new features. + + + There is a new helper, uml_net, which does the host setup that + requires root privileges. + + + There are currently five transport types available for a UML virtual + machine to exchange packets with other hosts: + + +o ethertap + + +o TUN/TAP + + +o Multicast + + +o a switch daemon + + +o slip + + +o slirp + + +o pcap + + The TUN/TAP, ethertap, slip, and slirp transports allow a UML + instance to exchange packets with the host. They may be directed + to the host or the host may just act as a router to provide access + to other physical or virtual machines. + + + The pcap transport is a synthetic read-only interface, using the + libpcap binary to collect packets from interfaces on the host and + filter them. This is useful for building preconfigured traffic + monitors or sniffers. + + + The daemon and multicast transports provide a completely virtual + network to other virtual machines. This network is completely + disconnected from the physical network unless one of the virtual + machines on it is acting as a gateway. + + + With so many host transports, which one should you use? Here's when + you should use each one: + + +o ethertap - if you want access to the host networking and it is + running 2.2 + + +o TUN/TAP - if you want access to the host networking and it is + running 2.4. Also, the TUN/TAP transport is able to use a + preconfigured device, allowing it to avoid using the setuid uml_net + helper, which is a security advantage. + + +o Multicast - if you want a purely virtual network and you don't want + to set up anything but the UML + + +o a switch daemon - if you want a purely virtual network and you + don't mind running the daemon in order to get somewhat better + performance + + +o slip - there is no particular reason to run the slip backend unless + ethertap and TUN/TAP are just not available for some reason + + +o slirp - if you don't have root access on the host to setup + networking, or if you don't want to allocate an IP to your UML + + +o pcap - not much use for actual network connectivity, but great for + monitoring traffic on the host + + Ethertap is available on 2.4 and works fine. TUN/TAP is preferred + to it because it has better performance and ethertap is officially + considered obsolete in 2.4. Also, the root helper only needs to + run occasionally for TUN/TAP, rather than handling every packet, as + it does with ethertap. This is a slight security advantage since + it provides fewer opportunities for a nasty UML user to somehow + exploit the helper's root privileges. + + + 66..11.. GGeenneerraall sseettuupp + + First, you must have the virtual network enabled in your UML. If are + running a prebuilt kernel from this site, everything is already + enabled. If you build the kernel yourself, under the "Network device + support" menu, enable "Network device support", and then the three + transports. + + + The next step is to provide a network device to the virtual machine. + This is done by describing it on the kernel command line. + + The general format is + + + eth = , + + + + + For example, a virtual ethernet device may be attached to a host + ethertap device as follows: + + + eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254 + + + + + This sets up eth0 inside the virtual machine to attach itself to the + host /dev/tap0, assigns it an ethernet address, and assigns the host + tap0 interface an IP address. + + + + Note that the IP address you assign to the host end of the tap device + must be different than the IP you assign to the eth device inside UML. + If you are short on IPs and don't want to consume two per UML, then + you can reuse the host's eth IP address for the host ends of the tap + devices. Internally, the UMLs must still get unique IPs for their eth + devices. You can also give the UMLs non-routable IPs (192.168.x.x or + 10.x.x.x) and have the host masquerade them. This will let outgoing + connections work, but incoming connections won't without more work, + such as port forwarding from the host. + Also note that when you configure the host side of an interface, it is + only acting as a gateway. It will respond to pings sent to it + locally, but is not useful to do that since it's a host interface. + You are not talking to the UML when you ping that interface and get a + response. + + + You can also add devices to a UML and remove them at runtime. See the + ``The Management Console'' page for details. + + + The sections below describe this in more detail. + + + Once you've decided how you're going to set up the devices, you boot + UML, log in, configure the UML side of the devices, and set up routes + to the outside world. At that point, you will be able to talk to any + other machines, physical or virtual, on the net. + + + If ifconfig inside UML fails and the network refuses to come up, run + tell you what went wrong. + + + + 66..22.. UUsseerrssppaaccee ddaaeemmoonnss + + You will likely need the setuid helper, or the switch daemon, or both. + They are both installed with the RPM and deb, so if you've installed + either, you can skip the rest of this section. + + + If not, then you need to check them out of CVS, build them, and + install them. The helper is uml_net, in CVS /tools/uml_net, and the + daemon is uml_switch, in CVS /tools/uml_router. They are both built + with a plain 'make'. Both need to be installed in a directory that's + in your path - /usr/bin is recommend. On top of that, uml_net needs + to be setuid root. + + + + 66..33.. SSppeecciiffyyiinngg eetthheerrnneett aaddddrreesssseess + + Below, you will see that the TUN/TAP, ethertap, and daemon interfaces + allow you to specify hardware addresses for the virtual ethernet + devices. This is generally not necessary. If you don't have a + specific reason to do it, you probably shouldn't. If one is not + specified on the command line, the driver will assign one based on the + device IP address. It will provide the address fe:fd:nn:nn:nn:nn + where nn.nn.nn.nn is the device IP address. This is nearly always + sufficient to guarantee a unique hardware address for the device. A + couple of exceptions are: + + +o Another set of virtual ethernet devices are on the same network and + they are assigned hardware addresses using a different scheme which + may conflict with the UML IP address-based scheme + + +o You aren't going to use the device for IP networking, so you don't + assign the device an IP address + + If you let the driver provide the hardware address, you should make + sure that the device IP address is known before the interface is + brought up. So, inside UML, this will guarantee that: + + + + UML# + ifconfig eth0 192.168.0.250 up + + + + + If you decide to assign the hardware address yourself, make sure that + the first byte of the address is even. Addresses with an odd first + byte are broadcast addresses, which you don't want assigned to a + device. + + + + 66..44.. UUMMLL iinntteerrffaaccee sseettuupp + + Once the network devices have been described on the command line, you + should boot UML and log in. + + + The first thing to do is bring the interface up: + + + UML# ifconfig ethn ip-address up + + + + + You should be able to ping the host at this point. + + + To reach the rest of the world, you should set a default route to the + host: + + + UML# route add default gw host ip + + + + + Again, with host ip of 192.168.0.4: + + + UML# route add default gw 192.168.0.4 + + + + + This page used to recommend setting a network route to your local net. + This is wrong, because it will cause UML to try to figure out hardware + addresses of the local machines by arping on the interface to the + host. Since that interface is basically a single strand of ethernet + with two nodes on it (UML and the host) and arp requests don't cross + networks, they will fail to elicit any responses. So, what you want + is for UML to just blindly throw all packets at the host and let it + figure out what to do with them, which is what leaving out the network + route and adding the default route does. + + + Note: If you can't communicate with other hosts on your physical + ethernet, it's probably because of a network route that's + automatically set up. If you run 'route -n' and see a route that + looks like this: + + + + + Destination Gateway Genmask Flags Metric Ref Use Iface + 192.168.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0 + + + + + with a mask that's not 255.255.255.255, then replace it with a route + to your host: + + + UML# + route del -net 192.168.0.0 dev eth0 netmask 255.255.255.0 + + + + + + + UML# + route add -host 192.168.0.4 dev eth0 + + + + + This, plus the default route to the host, will allow UML to exchange + packets with any machine on your ethernet. + + + + 66..55.. MMuullttiiccaasstt + + The simplest way to set up a virtual network between multiple UMLs is + to use the mcast transport. This was written by Harald Welte and is + present in UML version 2.4.5-5um and later. Your system must have + multicast enabled in the kernel and there must be a multicast-capable + network device on the host. Normally, this is eth0, but if there is + no ethernet card on the host, then you will likely get strange error + messages when you bring the device up inside UML. + + + To use it, run two UMLs with + + + eth0=mcast + + + + + on their command lines. Log in, configure the ethernet device in each + machine with different IP addresses: + + + UML1# ifconfig eth0 192.168.0.254 + + + + + + + UML2# ifconfig eth0 192.168.0.253 + + + + + and they should be able to talk to each other. + + The full set of command line options for this transport are + + + + ethn=mcast,ethernet address,multicast + address,multicast port,ttl + + + + + Harald's original README is here and explains these in detail, as well as + some other issues. + + There is also a related point-to-point only "ucast" transport. + This is useful when your network does not support multicast, and + all network connections are simple point to point links. + + The full set of command line options for this transport are + + + ethn=ucast,ethernet address,remote address,listen port,remote port + + + + + 66..66.. TTUUNN//TTAAPP wwiitthh tthhee uummll__nneett hheellppeerr + + TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the + host. The TUN/TAP backend has been in UML since 2.4.9-3um. + + + The easiest way to get up and running is to let the setuid uml_net + helper do the host setup for you. This involves insmod-ing the tun.o + module if necessary, configuring the device, and setting up IP + forwarding, routing, and proxy arp. If you are new to UML networking, + do this first. If you're concerned about the security implications of + the setuid helper, use it to get up and running, then read the next + section to see how to have UML use a preconfigured tap device, which + avoids the use of uml_net. + + + If you specify an IP address for the host side of the device, the + uml_net helper will do all necessary setup on the host - the only + requirement is that TUN/TAP be available, either built in to the host + kernel or as the tun.o module. + + The format of the command line switch to attach a device to a TUN/TAP + device is + + + eth =tuntap,,, + + + + + For example, this argument will attach the UML's eth0 to the next + available tap device and assign an ethernet address to it based on its + IP address + + + eth0=tuntap,,,192.168.0.254 + + + + + + + Note that the IP address that must be used for the eth device inside + UML is fixed by the routing and proxy arp that is set up on the + TUN/TAP device on the host. You can use a different one, but it won't + work because reply packets won't reach the UML. This is a feature. + It prevents a nasty UML user from doing things like setting the UML IP + to the same as the network's nameserver or mail server. + + + There are a couple potential problems with running the TUN/TAP + transport on a 2.4 host kernel + + +o TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host + kernel or use the ethertap transport. + + +o With an upgraded kernel, TUN/TAP may fail with + + + File descriptor in bad state + + + + + This is due to a header mismatch between the upgraded kernel and the + kernel that was originally installed on the machine. The fix is to + make sure that /usr/src/linux points to the headers for the running + kernel. + + These were pointed out by Tim Robinson in + name="this uml- + user post"> . + + + + 66..77.. TTUUNN//TTAAPP wwiitthh aa pprreeccoonnffiigguurreedd ttaapp ddeevviiccee + + If you prefer not to have UML use uml_net (which is somewhat + insecure), with UML 2.4.17-11, you can set up a TUN/TAP device + beforehand. The setup needs to be done as root, but once that's done, + there is no need for root assistance. Setting up the device is done + as follows: + + +o Create the device with tunctl (available from the UML utilities + tarball) + + + + + host# tunctl -u uid + + + + + where uid is the user id or username that UML will be run as. This + will tell you what device was created. + + +o Configure the device IP (change IP addresses and device name to + suit) + + + + + host# ifconfig tap0 192.168.0.254 up + + + + + + +o Set up routing and arping if desired - this is my recipe, there are + other ways of doing the same thing + + + host# + bash -c 'echo 1 > /proc/sys/net/ipv4/ip_forward' + + host# + route add -host 192.168.0.253 dev tap0 + + + + + + + host# + bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap0/proxy_arp' + + + + + + + host# + arp -Ds 192.168.0.253 eth0 pub + + + + + Note that this must be done every time the host boots - this configu- + ration is not stored across host reboots. So, it's probably a good + idea to stick it in an rc file. An even better idea would be a little + utility which reads the information from a config file and sets up + devices at boot time. + + +o Rather than using up two IPs and ARPing for one of them, you can + also provide direct access to your LAN by the UML by using a + bridge. + + + host# + brctl addbr br0 + + + + + + + host# + ifconfig eth0 0.0.0.0 promisc up + + + + + + + host# + ifconfig tap0 0.0.0.0 promisc up + + + + + + + host# + ifconfig br0 192.168.0.1 netmask 255.255.255.0 up + + + + + + + + host# + brctl stp br0 off + + + + + + + host# + brctl setfd br0 1 + + + + + + + host# + brctl sethello br0 1 + + + + + + + host# + brctl addif br0 eth0 + + + + + + + host# + brctl addif br0 tap0 + + + + + Note that 'br0' should be setup using ifconfig with the existing IP + address of eth0, as eth0 no longer has its own IP. + + +o + + + Also, the /dev/net/tun device must be writable by the user running + UML in order for the UML to use the device that's been configured + for it. The simplest thing to do is + + + host# chmod 666 /dev/net/tun + + + + + Making it world-writable looks bad, but it seems not to be + exploitable as a security hole. However, it does allow anyone to cre- + ate useless tap devices (useless because they can't configure them), + which is a DOS attack. A somewhat more secure alternative would to be + to create a group containing all the users who have preconfigured tap + devices and chgrp /dev/net/tun to that group with mode 664 or 660. + + + +o Once the device is set up, run UML with 'eth0=tuntap,device name' + (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the + mconsole config command). + + +o Bring the eth device up in UML and you're in business. + + If you don't want that tap device any more, you can make it non- + persistent with + + + host# tunctl -d tap device + + + + + Finally, tunctl has a -b (for brief mode) switch which causes it to + output only the name of the tap device it created. This makes it + suitable for capture by a script: + + + host# TAP=`tunctl -u 1000 -b` + + + + + + + 66..88.. EEtthheerrttaapp + + Ethertap is the general mechanism on 2.2 for userspace processes to + exchange packets with the kernel. + + + + To use this transport, you need to describe the virtual network device + on the UML command line. The general format for this is + + + eth =ethertap, , , + + + + + So, the previous example + + + eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254 + + + + + attaches the UML eth0 device to the host /dev/tap0, assigns it the + ethernet address fe:fd:0:0:0:1, and assigns the IP address + 192.168.0.254 to the tap device. + + + + The tap device is mandatory, but the others are optional. If the + ethernet address is omitted, one will be assigned to it. + + + The presence of the tap IP address will cause the helper to run and do + whatever host setup is needed to allow the virtual machine to + communicate with the outside world. If you're not sure you know what + you're doing, this is the way to go. + + + If it is absent, then you must configure the tap device and whatever + arping and routing you will need on the host. However, even in this + case, the uml_net helper still needs to be in your path and it must be + setuid root if you're not running UML as root. This is because the + tap device doesn't support SIGIO, which UML needs in order to use + something as a source of input. So, the helper is used as a + convenient asynchronous IO thread. + + If you're using the uml_net helper, you can ignore the following host + setup - uml_net will do it for you. You just need to make sure you + have ethertap available, either built in to the host kernel or + available as a module. + + + If you want to set things up yourself, you need to make sure that the + appropriate /dev entry exists. If it doesn't, become root and create + it as follows: + + + mknod /dev/tap c 36 + 16 + + + + + For example, this is how to create /dev/tap0: + + + mknod /dev/tap0 c 36 0 + 16 + + + + + You also need to make sure that the host kernel has ethertap support. + If ethertap is enabled as a module, you apparently need to insmod + ethertap once for each ethertap device you want to enable. So, + + + host# + insmod ethertap + + + + + will give you the tap0 interface. To get the tap1 interface, you need + to run + + + host# + insmod ethertap unit=1 -o ethertap1 + + + + + + + + 66..99.. TThhee sswwiittcchh ddaaeemmoonn + + NNoottee: This is the daemon formerly known as uml_router, but which was + renamed so the network weenies of the world would stop growling at me. + + + The switch daemon, uml_switch, provides a mechanism for creating a + totally virtual network. By default, it provides no connection to the + host network (but see -tap, below). + + + The first thing you need to do is run the daemon. Running it with no + arguments will make it listen on a default pair of unix domain + sockets. + + + If you want it to listen on a different pair of sockets, use + + + -unix control socket data socket + + + + + + If you want it to act as a hub rather than a switch, use + + + -hub + + + + + + If you want the switch to be connected to host networking (allowing + the umls to get access to the outside world through the host), use + + + -tap tap0 + + + + + + Note that the tap device must be preconfigured (see "TUN/TAP with a + preconfigured tap device", above). If you're using a different tap + device than tap0, specify that instead of tap0. + + + uml_switch can be backgrounded as follows + + + host% + uml_switch [ options ] < /dev/null > /dev/null + + + + + The reason it doesn't background by default is that it listens to + stdin for EOF. When it sees that, it exits. + + + The general format of the kernel command line switch is + + + + ethn=daemon,ethernet address,socket + type,control socket,data socket + + + + + You can leave off everything except the 'daemon'. You only need to + specify the ethernet address if the one that will be assigned to it + isn't acceptable for some reason. The rest of the arguments describe + how to communicate with the daemon. You should only specify them if + you told the daemon to use different sockets than the default. So, if + you ran the daemon with no arguments, running the UML on the same + machine with + eth0=daemon + + + + + will cause the eth0 driver to attach itself to the daemon correctly. + + + + 66..1100.. SSlliipp + + Slip is another, less general, mechanism for a process to communicate + with the host networking. In contrast to the ethertap interface, + which exchanges ethernet frames with the host and can be used to + transport any higher-level protocol, it can only be used to transport + IP. + + + The general format of the command line switch is + + + + ethn=slip,slip IP + + + + + The slip IP argument is the IP address that will be assigned to the + host end of the slip device. If it is specified, the helper will run + and will set up the host so that the virtual machine can reach it and + the rest of the network. + + + There are some oddities with this interface that you should be aware + of. You should only specify one slip device on a given virtual + machine, and its name inside UML will be 'umn', not 'eth0' or whatever + you specified on the command line. These problems will be fixed at + some point. + + + + 66..1111.. SSlliirrpp + + slirp uses an external program, usually /usr/bin/slirp, to provide IP + only networking connectivity through the host. This is similar to IP + masquerading with a firewall, although the translation is performed in + user-space, rather than by the kernel. As slirp does not set up any + interfaces on the host, or changes routing, slirp does not require + root access or setuid binaries on the host. + + + The general format of the command line switch for slirp is: + + + + ethn=slirp,ethernet address,slirp path + + + + + The ethernet address is optional, as UML will set up the interface + with an ethernet address based upon the initial IP address of the + interface. The slirp path is generally /usr/bin/slirp, although it + will depend on distribution. + + + The slirp program can have a number of options passed to the command + line and we can't add them to the UML command line, as they will be + parsed incorrectly. Instead, a wrapper shell script can be written or + the options inserted into the /.slirprc file. More information on + all of the slirp options can be found in its man pages. + + + The eth0 interface on UML should be set up with the IP 10.2.0.15, + although you can use anything as long as it is not used by a network + you will be connecting to. The default route on UML should be set to + use + + + UML# + route add default dev eth0 + + + + + slirp provides a number of useful IP addresses which can be used by + UML, such as 10.0.2.3 which is an alias for the DNS server specified + in /etc/resolv.conf on the host or the IP given in the 'dns' option + for slirp. + + + Even with a baudrate setting higher than 115200, the slirp connection + is limited to 115200. If you need it to go faster, the slirp binary + needs to be compiled with FULL_BOLT defined in config.h. + + + + 66..1122.. ppccaapp + + The pcap transport is attached to a UML ethernet device on the command + line or with uml_mconsole with the following syntax: + + + + ethn=pcap,host interface,filter + expression,option1,option2 + + + + + The expression and options are optional. + + + The interface is whatever network device on the host you want to + sniff. The expression is a pcap filter expression, which is also what + tcpdump uses, so if you know how to specify tcpdump filters, you will + use the same expressions here. The options are up to two of + 'promisc', control whether pcap puts the host interface into + promiscuous mode. 'optimize' and 'nooptimize' control whether the pcap + expression optimizer is used. + + + Example: + + + + eth0=pcap,eth0,tcp + + eth1=pcap,eth0,!tcp + + + + will cause the UML eth0 to emit all tcp packets on the host eth0 and + the UML eth1 to emit all non-tcp packets on the host eth0. + + + + 66..1133.. SSeettttiinngg uupp tthhee hhoosstt yyoouurrsseellff + + If you don't specify an address for the host side of the ethertap or + slip device, UML won't do any setup on the host. So this is what is + needed to get things working (the examples use a host-side IP of + 192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your + own network): + + +o The device needs to be configured with its IP address. Tap devices + are also configured with an mtu of 1484. Slip devices are + configured with a point-to-point address pointing at the UML ip + address. + + + host# ifconfig tap0 arp mtu 1484 192.168.0.251 up + + + + + + + host# + ifconfig sl0 192.168.0.251 pointopoint 192.168.0.250 up + + + + + + +o If a tap device is being set up, a route is set to the UML IP. + + + UML# route add -host 192.168.0.250 gw 192.168.0.251 + + + + + + +o To allow other hosts on your network to see the virtual machine, + proxy arp is set up for it. + + + host# arp -Ds 192.168.0.250 eth0 pub + + + + + + +o Finally, the host is set up to route packets. + + + host# echo 1 > /proc/sys/net/ipv4/ip_forward + + + + + + + + + + + 77.. SShhaarriinngg FFiilleessyysstteemmss bbeettwweeeenn VViirrttuuaall MMaacchhiinneess + + + + + 77..11.. AA wwaarrnniinngg + + Don't attempt to share filesystems simply by booting two UMLs from the + same file. That's the same thing as booting two physical machines + from a shared disk. It will result in filesystem corruption. + + + + 77..22.. UUssiinngg llaayyeerreedd bblloocckk ddeevviicceess + + The way to share a filesystem between two virtual machines is to use + the copy-on-write (COW) layering capability of the ubd block driver. + As of 2.4.6-2um, the driver supports layering a read-write private + device over a read-only shared device. A machine's writes are stored + in the private device, while reads come from either device - the + private one if the requested block is valid in it, the shared one if + not. Using this scheme, the majority of data which is unchanged is + shared between an arbitrary number of virtual machines, each of which + has a much smaller file containing the changes that it has made. With + a large number of UMLs booting from a large root filesystem, this + leads to a huge disk space saving. It will also help performance, + since the host will be able to cache the shared data using a much + smaller amount of memory, so UML disk requests will be served from the + host's memory rather than its disks. + + + + + To add a copy-on-write layer to an existing block device file, simply + add the name of the COW file to the appropriate ubd switch: + + + ubd0=root_fs_cow,root_fs_debian_22 + + + + + where 'root_fs_cow' is the private COW file and 'root_fs_debian_22' is + the existing shared filesystem. The COW file need not exist. If it + doesn't, the driver will create and initialize it. Once the COW file + has been initialized, it can be used on its own on the command line: + + + ubd0=root_fs_cow + + + + + The name of the backing file is stored in the COW file header, so it + would be redundant to continue specifying it on the command line. + + + + 77..33.. NNoottee!! + + When checking the size of the COW file in order to see the gobs of + space that you're saving, make sure you use 'ls -ls' to see the actual + disk consumption rather than the length of the file. The COW file is + sparse, so the length will be very different from the disk usage. + Here is a 'ls -l' of a COW file and backing file from one boot and + shutdown: + host% ls -l cow.debian debian2.2 + -rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian + -rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2 + + + + + Doesn't look like much saved space, does it? Well, here's 'ls -ls': + + + host% ls -ls cow.debian debian2.2 + 880 -rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian + 525832 -rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2 + + + + + Now, you can see that the COW file has less than a meg of disk, rather + than 492 meg. + + + + 77..44.. AAnnootthheerr wwaarrnniinngg + + Once a filesystem is being used as a readonly backing file for a COW + file, do not boot directly from it or modify it in any way. Doing so + will invalidate any COW files that are using it. The mtime and size + of the backing file are stored in the COW file header at its creation, + and they must continue to match. If they don't, the driver will + refuse to use the COW file. + + + + + If you attempt to evade this restriction by changing either the + backing file or the COW header by hand, you will get a corrupted + filesystem. + + + + + Among other things, this means that upgrading the distribution in a + backing file and expecting that all of the COW files using it will see + the upgrade will not work. + + + + + 77..55.. uummll__mmoooo :: MMeerrggiinngg aa CCOOWW ffiillee wwiitthh iittss bbaacckkiinngg ffiillee + + Depending on how you use UML and COW devices, it may be advisable to + merge the changes in the COW file into the backing file every once in + a while. + + + + + The utility that does this is uml_moo. Its usage is + + + host% uml_moo COW file new backing file + + + + + There's no need to specify the backing file since that information is + already in the COW file header. If you're paranoid, boot the new + merged file, and if you're happy with it, move it over the old backing + file. + + + + + uml_moo creates a new backing file by default as a safety measure. It + also has a destructive merge option which will merge the COW file + directly into its current backing file. This is really only usable + when the backing file only has one COW file associated with it. If + there are multiple COWs associated with a backing file, a -d merge of + one of them will invalidate all of the others. However, it is + convenient if you're short of disk space, and it should also be + noticeably faster than a non-destructive merge. + + + + + uml_moo is installed with the UML deb and RPM. If you didn't install + UML from one of those packages, you can also get it from the UML + utilities tar file in tools/moo. + + + + + + + + + 88.. CCrreeaattiinngg ffiilleessyysstteemmss + + + You may want to create and mount new UML filesystems, either because + your root filesystem isn't large enough or because you want to use a + filesystem other than ext2. + + + This was written on the occasion of reiserfs being included in the + 2.4.1 kernel pool, and therefore the 2.4.1 UML, so the examples will + talk about reiserfs. This information is generic, and the examples + should be easy to translate to the filesystem of your choice. + + + 88..11.. CCrreeaattee tthhee ffiilleessyysstteemm ffiillee + + dd is your friend. All you need to do is tell dd to create an empty + file of the appropriate size. I usually make it sparse to save time + and to avoid allocating disk space until it's actually used. For + example, the following command will create a sparse 100 meg file full + of zeroes. + + + host% + dd if=/dev/zero of=new_filesystem seek=100 count=1 bs=1M + + + + + + + 88..22.. AAssssiiggnn tthhee ffiillee ttoo aa UUMMLL ddeevviiccee + + Add an argument like the following to the UML command line: + + ubd4=new_filesystem + + + + + making sure that you use an unassigned ubd device number. + + + + 88..33.. CCrreeaattiinngg aanndd mmoouunnttiinngg tthhee ffiilleessyysstteemm + + Make sure that the filesystem is available, either by being built into + the kernel, or available as a module, then boot up UML and log in. If + the root filesystem doesn't have the filesystem utilities (mkfs, fsck, + etc), then get them into UML by way of the net or hostfs. + + + Make the new filesystem on the device assigned to the new file: + + + host# mkreiserfs /dev/ubd/4 + + + <----------- MKREISERFSv2 -----------> + + ReiserFS version 3.6.25 + Block size 4096 bytes + Block count 25856 + Used blocks 8212 + Journal - 8192 blocks (18-8209), journal header is in block 8210 + Bitmaps: 17 + Root block 8211 + Hash function "r5" + ATTENTION: ALL DATA WILL BE LOST ON '/dev/ubd/4'! (y/n)y + journal size 8192 (from 18) + Initializing journal - 0%....20%....40%....60%....80%....100% + Syncing..done. + + + + + Now, mount it: + + + UML# + mount /dev/ubd/4 /mnt + + + + + and you're in business. + + + + + + + + + + 99.. HHoosstt ffiillee aacccceessss + + + If you want to access files on the host machine from inside UML, you + can treat it as a separate machine and either nfs mount directories + from the host or copy files into the virtual machine with scp or rcp. + However, since UML is running on the host, it can access those + files just like any other process and make them available inside the + virtual machine without needing to use the network. + + + This is now possible with the hostfs virtual filesystem. With it, you + can mount a host directory into the UML filesystem and access the + files contained in it just as you would on the host. + + + 99..11.. UUssiinngg hhoossttffss + + To begin with, make sure that hostfs is available inside the virtual + machine with + + + UML# cat /proc/filesystems + + + + . hostfs should be listed. If it's not, either rebuild the kernel + with hostfs configured into it or make sure that hostfs is built as a + module and available inside the virtual machine, and insmod it. + + + Now all you need to do is run mount: + + + UML# mount none /mnt/host -t hostfs + + + + + will mount the host's / on the virtual machine's /mnt/host. + + + If you don't want to mount the host root directory, then you can + specify a subdirectory to mount with the -o switch to mount: + + + UML# mount none /mnt/home -t hostfs -o /home + + + + + will mount the hosts's /home on the virtual machine's /mnt/home. + + + + 99..22.. hhoossttffss aass tthhee rroooott ffiilleessyysstteemm + + It's possible to boot from a directory hierarchy on the host using + hostfs rather than using the standard filesystem in a file. + + To start, you need that hierarchy. The easiest way is to loop mount + an existing root_fs file: + + + host# mount root_fs uml_root_dir -o loop + + + + + You need to change the filesystem type of / in etc/fstab to be + 'hostfs', so that line looks like this: + + /dev/ubd/0 / hostfs defaults 1 1 + + + + + Then you need to chown to yourself all the files in that directory + that are owned by root. This worked for me: + + + host# find . -uid 0 -exec chown jdike {} \; + + + + + Next, make sure that your UML kernel has hostfs compiled in, not as a + module. Then run UML with the boot device pointing at that directory: + + + ubd0=/path/to/uml/root/directory + + + + + UML should then boot as it does normally. + + + 99..33.. BBuuiillddiinngg hhoossttffss + + If you need to build hostfs because it's not in your kernel, you have + two choices: + + + + +o Compiling hostfs into the kernel: + + + Reconfigure the kernel and set the 'Host filesystem' option under + + + +o Compiling hostfs as a module: + + + Reconfigure the kernel and set the 'Host filesystem' option under + be in arch/um/fs/hostfs/hostfs.o. Install that in + /lib/modules/`uname -r`/fs in the virtual machine, boot it up, and + + + UML# insmod hostfs + + + + + + + + + + + + + 1100.. TThhee MMaannaaggeemmeenntt CCoonnssoollee + + + + The UML management console is a low-level interface to the kernel, + somewhat like the i386 SysRq interface. Since there is a full-blown + operating system under UML, there is much greater flexibility possible + than with the SysRq mechanism. + + + There are a number of things you can do with the mconsole interface: + + +o get the kernel version + + +o add and remove devices + + +o halt or reboot the machine + + +o Send SysRq commands + + +o Pause and resume the UML + + + You need the mconsole client (uml_mconsole) which is present in CVS + (/tools/mconsole) in 2.4.5-9um and later, and will be in the RPM in + 2.4.6. + + + You also need CONFIG_MCONSOLE (under 'General Setup') enabled in UML. + When you boot UML, you'll see a line like: + + + mconsole initialized on /home/jdike/.uml/umlNJ32yL/mconsole + + + + + If you specify a unique machine id one the UML command line, i.e. + + + umid=debian + + + + + you'll see this + + + mconsole initialized on /home/jdike/.uml/debian/mconsole + + + + + That file is the socket that uml_mconsole will use to communicate with + UML. Run it with either the umid or the full path as its argument: + + + host% uml_mconsole debian + + + + + or + + + host% uml_mconsole /home/jdike/.uml/debian/mconsole + + + + + You'll get a prompt, at which you can run one of these commands: + + +o version + + +o halt + + +o reboot + + +o config + + +o remove + + +o sysrq + + +o help + + +o cad + + +o stop + + +o go + + + 1100..11.. vveerrssiioonn + + This takes no arguments. It prints the UML version. + + + (mconsole) version + OK Linux usermode 2.4.5-9um #1 Wed Jun 20 22:47:08 EDT 2001 i686 + + + + + There are a couple actual uses for this. It's a simple no-op which + can be used to check that a UML is running. It's also a way of + sending an interrupt to the UML. This is sometimes useful on SMP + hosts, where there's a bug which causes signals to UML to be lost, + often causing it to appear to hang. Sending such a UML the mconsole + version command is a good way to 'wake it up' before networking has + been enabled, as it does not do anything to the function of the UML. + + + + 1100..22.. hhaalltt aanndd rreebboooott + + These take no arguments. They shut the machine down immediately, with + no syncing of disks and no clean shutdown of userspace. So, they are + pretty close to crashing the machine. + + + (mconsole) halt + OK + + + + + + + 1100..33.. ccoonnffiigg + + "config" adds a new device to the virtual machine. Currently the ubd + and network drivers support this. It takes one argument, which is the + device to add, with the same syntax as the kernel command line. + + + + + (mconsole) + config ubd3=/home/jdike/incoming/roots/root_fs_debian22 + + OK + (mconsole) config eth1=mcast + OK + + + + + + + 1100..44.. rreemmoovvee + + "remove" deletes a device from the system. Its argument is just the + name of the device to be removed. The device must be idle in whatever + sense the driver considers necessary. In the case of the ubd driver, + the removed block device must not be mounted, swapped on, or otherwise + open, and in the case of the network driver, the device must be down. + + + (mconsole) remove ubd3 + OK + (mconsole) remove eth1 + OK + + + + + + + 1100..55.. ssyyssrrqq + + This takes one argument, which is a single letter. It calls the + generic kernel's SysRq driver, which does whatever is called for by + that argument. See the SysRq documentation in Documentation/sysrq.txt + in your favorite kernel tree to see what letters are valid and what + they do. + + + + 1100..66.. hheellpp + + "help" returns a string listing the valid commands and what each one + does. + + + + 1100..77.. ccaadd + + This invokes the Ctl-Alt-Del action on init. What exactly this ends + up doing is up to /etc/inittab. Normally, it reboots the machine. + With UML, this is usually not desired, so if a halt would be better, + then find the section of inittab that looks like this + + + # What to do when CTRL-ALT-DEL is pressed. + ca:12345:ctrlaltdel:/sbin/shutdown -t1 -a -r now + + + + + and change the command to halt. + + + + 1100..88.. ssttoopp + + This puts the UML in a loop reading mconsole requests until a 'go' + mconsole command is received. This is very useful for making backups + of UML filesystems, as the UML can be stopped, then synced via 'sysrq + s', so that everything is written to the filesystem. You can then copy + the filesystem and then send the UML 'go' via mconsole. + + + Note that a UML running with more than one CPU will have problems + after you send the 'stop' command, as only one CPU will be held in a + mconsole loop and all others will continue as normal. This is a bug, + and will be fixed. + + + + 1100..99.. ggoo + + This resumes a UML after being paused by a 'stop' command. Note that + when the UML has resumed, TCP connections may have timed out and if + the UML is paused for a long period of time, crond might go a little + crazy, running all the jobs it didn't do earlier. + + + + + + + + + 1111.. KKeerrnneell ddeebbuuggggiinngg + + + NNoottee:: The interface that makes debugging, as described here, possible + is present in 2.4.0-test6 kernels and later. + + + Since the user-mode kernel runs as a normal Linux process, it is + possible to debug it with gdb almost like any other process. It is + slightly different because the kernel's threads are already being + ptraced for system call interception, so gdb can't ptrace them. + However, a mechanism has been added to work around that problem. + + + In order to debug the kernel, you need build it from source. See + ``Compiling the kernel and modules'' for information on doing that. + Make sure that you enable CONFIG_DEBUGSYM and CONFIG_PT_PROXY during + the config. These will compile the kernel with -g, and enable the + ptrace proxy so that gdb works with UML, respectively. + + + + + 1111..11.. SSttaarrttiinngg tthhee kkeerrnneell uunnddeerr ggddbb + + You can have the kernel running under the control of gdb from the + beginning by putting 'debug' on the command line. You will get an + xterm with gdb running inside it. The kernel will send some commands + to gdb which will leave it stopped at the beginning of start_kernel. + At this point, you can get things going with 'next', 'step', or + 'cont'. + + + There is a transcript of a debugging session here , with breakpoints being set in the scheduler and in an + interrupt handler. + 1111..22.. EExxaammiinniinngg sslleeeeppiinngg pprroocceesssseess + + Not every bug is evident in the currently running process. Sometimes, + processes hang in the kernel when they shouldn't because they've + deadlocked on a semaphore or something similar. In this case, when + you ^C gdb and get a backtrace, you will see the idle thread, which + isn't very relevant. + + + What you want is the stack of whatever process is sleeping when it + shouldn't be. You need to figure out which process that is, which is + generally fairly easy. Then you need to get its host process id, + which you can do either by looking at ps on the host or at + task.thread.extern_pid in gdb. + + + Now what you do is this: + + +o detach from the current thread + + + (UML gdb) det + + + + + + +o attach to the thread you are interested in + + + (UML gdb) att + + + + + + +o look at its stack and anything else of interest + + + (UML gdb) bt + + + + + Note that you can't do anything at this point that requires that a + process execute, e.g. calling a function + + +o when you're done looking at that process, reattach to the current + thread and continue it + + + (UML gdb) + att 1 + + + + + + + (UML gdb) + c + + + + + Here, specifying any pid which is not the process id of a UML thread + will cause gdb to reattach to the current thread. I commonly use 1, + but any other invalid pid would work. + + + + 1111..33.. RRuunnnniinngg dddddd oonn UUMMLL + + ddd works on UML, but requires a special kludge. The process goes + like this: + + +o Start ddd + + + host% ddd linux + + + + + + +o With ps, get the pid of the gdb that ddd started. You can ask the + gdb to tell you, but for some reason that confuses things and + causes a hang. + + +o run UML with 'debug=parent gdb-pid=' added to the command line + - it will just sit there after you hit return + + +o type 'att 1' to the ddd gdb and you will see something like + + + 0xa013dc51 in __kill () + + + (gdb) + + + + + + +o At this point, type 'c', UML will boot up, and you can use ddd just + as you do on any other process. + + + + 1111..44.. DDeebbuuggggiinngg mmoodduulleess + + gdb has support for debugging code which is dynamically loaded into + the process. This support is what is needed to debug kernel modules + under UML. + + + Using that support is somewhat complicated. You have to tell gdb what + object file you just loaded into UML and where in memory it is. Then, + it can read the symbol table, and figure out where all the symbols are + from the load address that you provided. It gets more interesting + when you load the module again (i.e. after an rmmod). You have to + tell gdb to forget about all its symbols, including the main UML ones + for some reason, then load then all back in again. + + + There's an easy way and a hard way to do this. The easy way is to use + the umlgdb expect script written by Chandan Kudige. It basically + automates the process for you. + + + First, you must tell it where your modules are. There is a list in + the script that looks like this: + set MODULE_PATHS { + "fat" "/usr/src/uml/linux-2.4.18/fs/fat/fat.o" + "isofs" "/usr/src/uml/linux-2.4.18/fs/isofs/isofs.o" + "minix" "/usr/src/uml/linux-2.4.18/fs/minix/minix.o" + } + + + + + You change that to list the names and paths of the modules that you + are going to debug. Then you run it from the toplevel directory of + your UML pool and it basically tells you what to do: + + + + + ******** GDB pid is 21903 ******** + Start UML as: ./linux debug gdb-pid=21903 + + + + GNU gdb 5.0rh-5 Red Hat Linux 7.1 + Copyright 2001 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux"... + (gdb) b sys_init_module + Breakpoint 1 at 0xa0011923: file module.c, line 349. + (gdb) att 1 + + + + + After you run UML and it sits there doing nothing, you hit return at + the 'att 1' and continue it: + + + Attaching to program: /home/jdike/linux/2.4/um/./linux, process 1 + 0xa00f4221 in __kill () + (UML gdb) c + Continuing. + + + + + At this point, you debug normally. When you insmod something, the + expect magic will kick in and you'll see something like: + + + + + + + + + + + + + + + + + + *** Module hostfs loaded *** + Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs", + mod_user=0x8070e00) at module.c:349 + 349 char *name, *n_name, *name_tmp = NULL; + (UML gdb) finish + Run till exit from #0 sys_init_module (name_user=0x805abb0 "hostfs", + mod_user=0x8070e00) at module.c:349 + 0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411 + 411 else res = EXECUTE_SYSCALL(syscall, regs); + Value returned is $1 = 0 + (UML gdb) + p/x (int)module_list + module_list->size_of_struct + + $2 = 0xa9021054 + (UML gdb) symbol-file ./linux + Load new symbol table from "./linux"? (y or n) y + Reading symbols from ./linux... + done. + (UML gdb) + add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054 + + add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at + .text_addr = 0xa9021054 + (y or n) y + + Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o... + done. + (UML gdb) p *module_list + $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs", + size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1, + nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0, + init = 0xa90221f0 , cleanup = 0xa902222c , + ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0, + persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0, + kallsyms_end = 0x0, + archdata_start = 0x1b855
, + archdata_end = 0xe5890000
, + kernel_data = 0xf689c35d
} + >> Finished loading symbols for hostfs ... + + + + + That's the easy way. It's highly recommended. The hard way is + described below in case you're interested in what's going on. + + + Boot the kernel under the debugger and load the module with insmod or + modprobe. With gdb, do: + + + (UML gdb) p module_list + + + + + This is a list of modules that have been loaded into the kernel, with + the most recently loaded module first. Normally, the module you want + is at module_list. If it's not, walk down the next links, looking at + the name fields until find the module you want to debug. Take the + address of that structure, and add module.size_of_struct (which in + 2.4.10 kernels is 96 (0x60)) to it. Gdb can make this hard addition + for you :-): + + + + (UML gdb) + printf "%#x\n", (int)module_list module_list->size_of_struct + + + + + The offset from the module start occasionally changes (before 2.4.0, + it was module.size_of_struct + 4), so it's a good idea to check the + init and cleanup addresses once in a while, as describe below. Now + do: + + + (UML gdb) + add-symbol-file /path/to/module/on/host that_address + + + + + Tell gdb you really want to do it, and you're in business. + + + If there's any doubt that you got the offset right, like breakpoints + appear not to work, or they're appearing in the wrong place, you can + check it by looking at the module structure. The init and cleanup + fields should look like: + + + init = 0x588066b0 , cleanup = 0x588066c0 + + + + + with no offsets on the symbol names. If the names are right, but they + are offset, then the offset tells you how much you need to add to the + address you gave to add-symbol-file. + + + When you want to load in a new version of the module, you need to get + gdb to forget about the old one. The only way I've found to do that + is to tell gdb to forget about all symbols that it knows about: + + + (UML gdb) symbol-file + + + + + Then reload the symbols from the kernel binary: + + + (UML gdb) symbol-file /path/to/kernel + + + + + and repeat the process above. You'll also need to re-enable break- + points. They were disabled when you dumped all the symbols because + gdb couldn't figure out where they should go. + + + + 1111..55.. AAttttaacchhiinngg ggddbb ttoo tthhee kkeerrnneell + + If you don't have the kernel running under gdb, you can attach gdb to + it later by sending the tracing thread a SIGUSR1. The first line of + the console output identifies its pid: + tracing thread pid = 20093 + + + + + When you send it the signal: + + + host% kill -USR1 20093 + + + + + you will get an xterm with gdb running in it. + + + If you have the mconsole compiled into UML, then the mconsole client + can be used to start gdb: + + + (mconsole) (mconsole) config gdb=xterm + + + + + will fire up an xterm with gdb running in it. + + + + 1111..66.. UUssiinngg aalltteerrnnaattee ddeebbuuggggeerrss + + UML has support for attaching to an already running debugger rather + than starting gdb itself. This is present in CVS as of 17 Apr 2001. + I sent it to Alan for inclusion in the ac tree, and it will be in my + 2.4.4 release. + + + This is useful when gdb is a subprocess of some UI, such as emacs or + ddd. It can also be used to run debuggers other than gdb on UML. + Below is an example of using strace as an alternate debugger. + + + To do this, you need to get the pid of the debugger and pass it in + with the + + + If you are using gdb under some UI, then tell it to 'att 1', and + you'll find yourself attached to UML. + + + If you are using something other than gdb as your debugger, then + you'll need to get it to do the equivalent of 'att 1' if it doesn't do + it automatically. + + + An example of an alternate debugger is strace. You can strace the + actual kernel as follows: + + +o Run the following in a shell + + + host% + sh -c 'echo pid=$$; echo -n hit return; read x; exec strace -p 1 -o strace.out' + + + + +o Run UML with 'debug' and 'gdb-pid=' with the pid printed out + by the previous command + + +o Hit return in the shell, and UML will start running, and strace + output will start accumulating in the output file. + + Note that this is different from running + + + host% strace ./linux + + + + + That will strace only the main UML thread, the tracing thread, which + doesn't do any of the actual kernel work. It just oversees the vir- + tual machine. In contrast, using strace as described above will show + you the low-level activity of the virtual machine. + + + + + + 1122.. KKeerrnneell ddeebbuuggggiinngg eexxaammpplleess + + 1122..11.. TThhee ccaassee ooff tthhee hhuunngg ffsscckk + + When booting up the kernel, fsck failed, and dropped me into a shell + to fix things up. I ran fsck -y, which hung: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Setting hostname uml [ OK ] + Checking root filesystem + /dev/fhd0 was not cleanly unmounted, check forced. + Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. + + /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY. + (i.e., without -a or -p options) + [ FAILED ] + + *** An error occurred during the file system check. + *** Dropping you to a shell; the system will reboot + *** when you leave the shell. + Give root password for maintenance + (or type Control-D for normal startup): + + [root@uml /root]# fsck -y /dev/fhd0 + fsck -y /dev/fhd0 + Parallelizing fsck version 1.14 (9-Jan-1999) + e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09 + /dev/fhd0 contains a file system with errors, check forced. + Pass 1: Checking inodes, blocks, and sizes + Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes + + Inode 19780, i_blocks is 1548, should be 540. Fix? yes + + Pass 2: Checking directory structure + Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes + + Directory inode 11858, block 0, offset 0: directory corrupted + Salvage? yes + + Missing '.' in directory inode 11858. + Fix? yes + + Missing '..' in directory inode 11858. + Fix? yes + + + + + + The standard drill in this sort of situation is to fire up gdb on the + signal thread, which, in this case, was pid 1935. In another window, + I run gdb and attach pid 1935. + + + + + ~/linux/2.3.26/um 1016: gdb linux + GNU gdb 4.17.0.11 with Linux support + Copyright 1998 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux"... + + (gdb) att 1935 + Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1935 + 0x100756d9 in __wait4 () + + + + + + + Let's see what's currently running: + + + + (gdb) p current_task.pid + $1 = 0 + + + + + + It's the idle thread, which means that fsck went to sleep for some + reason and never woke up. + + + Let's guess that the last process in the process list is fsck: + + + + (gdb) p current_task.prev_task.comm + $13 = "fsck.ext2\000\000\000\000\000\000" + + + + + + It is, so let's see what it thinks it's up to: + + + + (gdb) p current_task.prev_task.thread + $14 = {extern_pid = 1980, tracing = 0, want_tracing = 0, forking = 0, + kernel_stack_page = 0, signal_stack = 1342627840, syscall = {id = 4, args = { + 3, 134973440, 1024, 0, 1024}, have_result = 0, result = 50590720}, + request = {op = 2, u = {exec = {ip = 1350467584, sp = 2952789424}, fork = { + regs = {1350467584, 2952789424, 0 }, sigstack = 0, + pid = 0}, switch_to = 0x507e8000, thread = {proc = 0x507e8000, + arg = 0xaffffdb0, flags = 0, new_pid = 0}, input_request = { + op = 1350467584, fd = -1342177872, proc = 0, pid = 0}}}} + + + + + + The interesting things here are the fact that its .thread.syscall.id + is __NR_write (see the big switch in arch/um/kernel/syscall_kern.c or + the defines in include/asm-um/arch/unistd.h), and that it never + returned. Also, its .request.op is OP_SWITCH (see + arch/um/include/user_util.h). These mean that it went into a write, + and, for some reason, called schedule(). + + + The fact that it never returned from write means that its stack should + be fairly interesting. Its pid is 1980 (.thread.extern_pid). That + process is being ptraced by the signal thread, so it must be detached + before gdb can attach it: + + + + + + + + + + + (gdb) call detach(1980) + + Program received signal SIGSEGV, Segmentation fault. + + The program being debugged stopped while in a function called from GDB. + When the function (detach) is done executing, GDB will silently + stop (instead of continuing to evaluate the expression containing + the function call). + (gdb) call detach(1980) + $15 = 0 + + + + + + The first detach segfaults for some reason, and the second one + succeeds. + + + Now I detach from the signal thread, attach to the fsck thread, and + look at its stack: + + + (gdb) det + Detaching from program: /home/dike/linux/2.3.26/um/linux Pid 1935 + (gdb) att 1980 + Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1980 + 0x10070451 in __kill () + (gdb) bt + #0 0x10070451 in __kill () + #1 0x10068ccd in usr1_pid (pid=1980) at process.c:30 + #2 0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000) + at process_kern.c:156 + #3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000) + at process_kern.c:161 + #4 0x10001d12 in schedule () at sched.c:777 + #5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71 + #6 0x1006aa10 in __down_failed () at semaphore.c:157 + #7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174 + #8 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182 + #9 + #10 0x10155404 in errno () + #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50 + #12 0x1006c5d8 in segv_handler (sc=0x5006eaf8) at trap_user.c:174 + #13 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182 + #14 + #15 0xc0fd in ?? () + #16 0x10016647 in sys_write (fd=3, + buf=0x80b8800
, count=1024) + at read_write.c:159 + #17 0x1006d5b3 in execute_syscall (syscall=4, args=0x5006ef08) + at syscall_kern.c:254 + #18 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35 + #19 + #20 0x400dc8b0 in ?? () + + + + + + The interesting things here are : + + +o There are two segfaults on this stack (frames 9 and 14) + + +o The first faulting address (frame 11) is 0x50000800 + + (gdb) p (void *)1342179328 + $16 = (void *) 0x50000800 + + + + + + The initial faulting address is interesting because it is on the idle + thread's stack. I had been seeing the idle thread segfault for no + apparent reason, and the cause looked like stack corruption. In hopes + of catching the culprit in the act, I had turned off all protections + to that stack while the idle thread wasn't running. This apparently + tripped that trap. + + + However, the more immediate problem is that second segfault and I'm + going to concentrate on that. First, I want to see where the fault + happened, so I have to go look at the sigcontent struct in frame 8: + + + + (gdb) up + #1 0x10068ccd in usr1_pid (pid=1980) at process.c:30 + 30 kill(pid, SIGUSR1); + (gdb) + #2 0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000) + at process_kern.c:156 + 156 usr1_pid(getpid()); + (gdb) + #3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000) + at process_kern.c:161 + 161 _switch_to(prev, next); + (gdb) + #4 0x10001d12 in schedule () at sched.c:777 + 777 switch_to(prev, next, prev); + (gdb) + #5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71 + 71 schedule(); + (gdb) + #6 0x1006aa10 in __down_failed () at semaphore.c:157 + 157 } + (gdb) + #7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174 + 174 segv(sc->cr2, sc->err & 2); + (gdb) + #8 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182 + 182 segv_handler(sc); + (gdb) p *sc + Cannot access memory at address 0x0. + + + + + That's not very useful, so I'll try a more manual method: + + + (gdb) p *((struct sigcontext *) (&sig + 1)) + $19 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43, + __dsh = 0, edi = 1342179328, esi = 1350378548, ebp = 1342630440, + esp = 1342630420, ebx = 1348150624, edx = 1280, ecx = 0, eax = 0, + trapno = 14, err = 4, eip = 268480945, cs = 35, __csh = 0, eflags = 66118, + esp_at_signal = 1342630420, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0, + cr2 = 1280} + + + + The ip is in handle_mm_fault: + + + (gdb) p (void *)268480945 + $20 = (void *) 0x1000b1b1 + (gdb) i sym $20 + handle_mm_fault + 57 in section .text + + + + + + Specifically, it's in pte_alloc: + + + (gdb) i line *$20 + Line 124 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b1b1 + and ends at 0x1000b1b7 . + + + + + + To find where in handle_mm_fault this is, I'll jump forward in the + code until I see an address in that procedure: + + + + (gdb) i line *0x1000b1c0 + Line 126 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b1b7 + and ends at 0x1000b1c3 . + (gdb) i line *0x1000b1d0 + Line 131 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b1d0 + and ends at 0x1000b1da . + (gdb) i line *0x1000b1e0 + Line 61 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b1da + and ends at 0x1000b1e1 . + (gdb) i line *0x1000b1f0 + Line 134 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b1f0 + and ends at 0x1000b200 . + (gdb) i line *0x1000b200 + Line 135 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b200 + and ends at 0x1000b208 . + (gdb) i line *0x1000b210 + Line 139 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h" + starts at address 0x1000b210 + and ends at 0x1000b219 . + (gdb) i line *0x1000b220 + Line 1168 of "memory.c" starts at address 0x1000b21e + and ends at 0x1000b222 . + + + + + + Something is apparently wrong with the page tables or vma_structs, so + lets go back to frame 11 and have a look at them: + + + + #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50 + 50 handle_mm_fault(current, vma, address, is_write); + (gdb) call pgd_offset_proc(vma->vm_mm, address) + $22 = (pgd_t *) 0x80a548c + + + + + + That's pretty bogus. Page tables aren't supposed to be in process + text or data areas. Let's see what's in the vma: + + + (gdb) p *vma + $23 = {vm_mm = 0x507d2434, vm_start = 0, vm_end = 134512640, + vm_next = 0x80a4f8c, vm_page_prot = {pgprot = 0}, vm_flags = 31200, + vm_avl_height = 2058, vm_avl_left = 0x80a8c94, vm_avl_right = 0x80d1000, + vm_next_share = 0xaffffdb0, vm_pprev_share = 0xaffffe63, + vm_ops = 0xaffffe7a, vm_pgoff = 2952789626, vm_file = 0xafffffec, + vm_private_data = 0x62} + (gdb) p *vma.vm_mm + $24 = {mmap = 0x507d2434, mmap_avl = 0x0, mmap_cache = 0x8048000, + pgd = 0x80a4f8c, mm_users = {counter = 0}, mm_count = {counter = 134904288}, + map_count = 134909076, mmap_sem = {count = {counter = 135073792}, + sleepers = -1342177872, wait = {lock = , + task_list = {next = 0xaffffe63, prev = 0xaffffe7a}, + __magic = -1342177670, __creator = -1342177300}, __magic = 98}, + page_table_lock = {}, context = 138, start_code = 0, end_code = 0, + start_data = 0, end_data = 0, start_brk = 0, brk = 0, start_stack = 0, + arg_start = 0, arg_end = 0, env_start = 0, env_end = 0, rss = 1350381536, + total_vm = 0, locked_vm = 0, def_flags = 0, cpu_vm_mask = 0, swap_cnt = 0, + swap_address = 0, segments = 0x0} + + + + + + This also pretty bogus. With all of the 0x80xxxxx and 0xaffffxxx + addresses, this is looking like a stack was plonked down on top of + these structures. Maybe it's a stack overflow from the next page: + + + + (gdb) p vma + $25 = (struct vm_area_struct *) 0x507d2434 + + + + + + That's towards the lower quarter of the page, so that would have to + have been pretty heavy stack overflow: + + + + + + + + + + + + + + + (gdb) x/100x $25 + 0x507d2434: 0x507d2434 0x00000000 0x08048000 0x080a4f8c + 0x507d2444: 0x00000000 0x080a79e0 0x080a8c94 0x080d1000 + 0x507d2454: 0xaffffdb0 0xaffffe63 0xaffffe7a 0xaffffe7a + 0x507d2464: 0xafffffec 0x00000062 0x0000008a 0x00000000 + 0x507d2474: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2484: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2494: 0x00000000 0x00000000 0x507d2fe0 0x00000000 + 0x507d24a4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d24b4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d24c4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d24d4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d24e4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d24f4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2504: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2514: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2524: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2534: 0x00000000 0x00000000 0x507d25dc 0x00000000 + 0x507d2544: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2554: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2564: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2574: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2584: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d2594: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d25a4: 0x00000000 0x00000000 0x00000000 0x00000000 + 0x507d25b4: 0x00000000 0x00000000 0x00000000 0x00000000 + + + + + + It's not stack overflow. The only "stack-like" piece of this data is + the vma_struct itself. + + + At this point, I don't see any avenues to pursue, so I just have to + admit that I have no idea what's going on. What I will do, though, is + stick a trap on the segfault handler which will stop if it sees any + writes to the idle thread's stack. That was the thing that happened + first, and it may be that if I can catch it immediately, what's going + on will be somewhat clearer. + + + 1122..22.. EEppiissooddee 22:: TThhee ccaassee ooff tthhee hhuunngg ffsscckk + + After setting a trap in the SEGV handler for accesses to the signal + thread's stack, I reran the kernel. + + + fsck hung again, this time by hitting the trap: + + + + + + + + + + + + + + + + + Setting hostname uml [ OK ] + Checking root filesystem + /dev/fhd0 contains a file system with errors, check forced. + Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. + + /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY. + (i.e., without -a or -p options) + [ FAILED ] + + *** An error occurred during the file system check. + *** Dropping you to a shell; the system will reboot + *** when you leave the shell. + Give root password for maintenance + (or type Control-D for normal startup): + + [root@uml /root]# fsck -y /dev/fhd0 + fsck -y /dev/fhd0 + Parallelizing fsck version 1.14 (9-Jan-1999) + e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09 + /dev/fhd0 contains a file system with errors, check forced. + Pass 1: Checking inodes, blocks, and sizes + Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes + + Pass 2: Checking directory structure + Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes + + Directory inode 11858, block 0, offset 0: directory corrupted + Salvage? yes + + Missing '.' in directory inode 11858. + Fix? yes + + Missing '..' in directory inode 11858. + Fix? yes + + Untested (4127) [100fe44c]: trap_kern.c line 31 + + + + + + I need to get the signal thread to detach from pid 4127 so that I can + attach to it with gdb. This is done by sending it a SIGUSR1, which is + caught by the signal thread, which detaches the process: + + + kill -USR1 4127 + + + + + + Now I can run gdb on it: + + + + + + + + + + + + + + ~/linux/2.3.26/um 1034: gdb linux + GNU gdb 4.17.0.11 with Linux support + Copyright 1998 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux"... + (gdb) att 4127 + Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127 + 0x10075891 in __libc_nanosleep () + + + + + + The backtrace shows that it was in a write and that the fault address + (address in frame 3) is 0x50000800, which is right in the middle of + the signal thread's stack page: + + + (gdb) bt + #0 0x10075891 in __libc_nanosleep () + #1 0x1007584d in __sleep (seconds=1000000) + at ../sysdeps/unix/sysv/linux/sleep.c:78 + #2 0x1006ce9a in stop () at user_util.c:191 + #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31 + #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174 + #5 0x1006c63c in kern_segv_handler (sig=11) at trap_user.c:182 + #6 + #7 0xc0fd in ?? () + #8 0x10016647 in sys_write (fd=3, buf=0x80b8800 "R.", count=1024) + at read_write.c:159 + #9 0x1006d603 in execute_syscall (syscall=4, args=0x5006ef08) + at syscall_kern.c:254 + #10 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35 + #11 + #12 0x400dc8b0 in ?? () + #13 + #14 0x400dc8b0 in ?? () + #15 0x80545fd in ?? () + #16 0x804daae in ?? () + #17 0x8054334 in ?? () + #18 0x804d23e in ?? () + #19 0x8049632 in ?? () + #20 0x80491d2 in ?? () + #21 0x80596b5 in ?? () + (gdb) p (void *)1342179328 + $3 = (void *) 0x50000800 + + + + + + Going up the stack to the segv_handler frame and looking at where in + the code the access happened shows that it happened near line 110 of + block_dev.c: + + + + + + + + + + (gdb) up + #1 0x1007584d in __sleep (seconds=1000000) + at ../sysdeps/unix/sysv/linux/sleep.c:78 + ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory. + (gdb) + #2 0x1006ce9a in stop () at user_util.c:191 + 191 while(1) sleep(1000000); + (gdb) + #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31 + 31 KERN_UNTESTED(); + (gdb) + #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174 + 174 segv(sc->cr2, sc->err & 2); + (gdb) p *sc + $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43, + __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484, + esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14, + err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070, + esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0, + cr2 = 1342179328} + (gdb) p (void *)268550834 + $2 = (void *) 0x1001c2b2 + (gdb) i sym $2 + block_write + 1090 in section .text + (gdb) i line *$2 + Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h" + starts at address 0x1001c2a1 + and ends at 0x1001c2bf . + (gdb) i line *0x1001c2c0 + Line 110 of "block_dev.c" starts at address 0x1001c2bf + and ends at 0x1001c2e3 . + + + + + + Looking at the source shows that the fault happened during a call to + copy_to_user to copy the data into the kernel: + + + 107 count -= chars; + 108 copy_from_user(p,buf,chars); + 109 p += chars; + 110 buf += chars; + + + + + + p is the pointer which must contain 0x50000800, since buf contains + 0x80b8800 (frame 8 above). It is defined as: + + + p = offset + bh->b_data; + + + + + + I need to figure out what bh is, and it just so happens that bh is + passed as an argument to mark_buffer_uptodate and mark_buffer_dirty a + few lines later, so I do a little disassembly: + + + + + (gdb) disas 0x1001c2bf 0x1001c2e0 + Dump of assembler code from 0x1001c2bf to 0x1001c2d0: + 0x1001c2bf : addl %eax,0xc(%ebp) + 0x1001c2c2 : movl 0xfffffdd4(%ebp),%edx + 0x1001c2c8 : btsl $0x0,0x18(%edx) + 0x1001c2cd : btsl $0x1,0x18(%edx) + 0x1001c2d2 : sbbl %ecx,%ecx + 0x1001c2d4 : testl %ecx,%ecx + 0x1001c2d6 : jne 0x1001c2e3 + 0x1001c2d8 : pushl $0x0 + 0x1001c2da : pushl %edx + 0x1001c2db : call 0x1001819c <__mark_buffer_dirty> + End of assembler dump. + + + + + + At that point, bh is in %edx (address 0x1001c2da), which is calculated + at 0x1001c2c2 as %ebp + 0xfffffdd4, so I figure exactly what that is, + taking %ebp from the sigcontext_struct above: + + + (gdb) p (void *)1342631484 + $5 = (void *) 0x5006ee3c + (gdb) p 0x5006ee3c+0xfffffdd4 + $6 = 1342630928 + (gdb) p (void *)$6 + $7 = (void *) 0x5006ec10 + (gdb) p *((void **)$7) + $8 = (void *) 0x50100200 + + + + + + Now, I look at the structure to see what's in it, and particularly, + what its b_data field contains: + + + (gdb) p *((struct buffer_head *)0x50100200) + $13 = {b_next = 0x50289380, b_blocknr = 49405, b_size = 1024, b_list = 0, + b_dev = 15872, b_count = {counter = 1}, b_rdev = 15872, b_state = 24, + b_flushtime = 0, b_next_free = 0x501001a0, b_prev_free = 0x50100260, + b_this_page = 0x501001a0, b_reqnext = 0x0, b_pprev = 0x507fcf58, + b_data = 0x50000800 "", b_page = 0x50004000, + b_end_io = 0x10017f60 , b_dev_id = 0x0, + b_rsector = 98810, b_wait = {lock = , + task_list = {next = 0x50100248, prev = 0x50100248}, __magic = 1343226448, + __creator = 0}, b_kiobuf = 0x0} + + + + + + The b_data field is indeed 0x50000800, so the question becomes how + that happened. The rest of the structure looks fine, so this probably + is not a case of data corruption. It happened on purpose somehow. + + + The b_page field is a pointer to the page_struct representing the + 0x50000000 page. Looking at it shows the kernel's idea of the state + of that page: + + + + (gdb) p *$13.b_page + $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0, + index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = { + next = 0x50008460, prev = 0x50019350}, wait = { + lock = , task_list = {next = 0x50004024, + prev = 0x50004024}, __magic = 1342193708, __creator = 0}, + pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280, + zone = 0x100c5160} + + + + + + Some sanity-checking: the virtual field shows the "virtual" address of + this page, which in this kernel is the same as its "physical" address, + and the page_struct itself should be mem_map[0], since it represents + the first page of memory: + + + + (gdb) p (void *)1342177280 + $18 = (void *) 0x50000000 + (gdb) p mem_map + $19 = (mem_map_t *) 0x50004000 + + + + + + These check out fine. + + + Now to check out the page_struct itself. In particular, the flags + field shows whether the page is considered free or not: + + + (gdb) p (void *)132 + $21 = (void *) 0x84 + + + + + + The "reserved" bit is the high bit, which is definitely not set, so + the kernel considers the signal stack page to be free and available to + be used. + + + At this point, I jump to conclusions and start looking at my early + boot code, because that's where that page is supposed to be reserved. + + + In my setup_arch procedure, I have the following code which looks just + fine: + + + + bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn); + free_bootmem(__pa(low_physmem) + bootmap_size, high_physmem - low_physmem); + + + + + + Two stack pages have already been allocated, and low_physmem points to + the third page, which is the beginning of free memory. + The init_bootmem call declares the entire memory to the boot memory + manager, which marks it all reserved. The free_bootmem call frees up + all of it, except for the first two pages. This looks correct to me. + + + So, I decide to see init_bootmem run and make sure that it is marking + those first two pages as reserved. I never get that far. + + + Stepping into init_bootmem, and looking at bootmem_map before looking + at what it contains shows the following: + + + + (gdb) p bootmem_map + $3 = (void *) 0x50000000 + + + + + + Aha! The light dawns. That first page is doing double duty as a + stack and as the boot memory map. The last thing that the boot memory + manager does is to free the pages used by its memory map, so this page + is getting freed even its marked as reserved. + + + The fix was to initialize the boot memory manager before allocating + those two stack pages, and then allocate them through the boot memory + manager. After doing this, and fixing a couple of subsequent buglets, + the stack corruption problem disappeared. + + + + + + 1133.. WWhhaatt ttoo ddoo wwhheenn UUMMLL ddooeessnn''tt wwoorrkk + + + + + 1133..11.. SSttrraannggee ccoommppiillaattiioonn eerrrroorrss wwhheenn yyoouu bbuuiilldd ffrroomm ssoouurrccee + + As of test11, it is necessary to have "ARCH=um" in the environment or + on the make command line for all steps in building UML, including + clean, distclean, or mrproper, config, menuconfig, or xconfig, dep, + and linux. If you forget for any of them, the i386 build seems to + contaminate the UML build. If this happens, start from scratch with + + + host% + make mrproper ARCH=um + + + + + and repeat the build process with ARCH=um on all the steps. + + + See ``Compiling the kernel and modules'' for more details. + + + Another cause of strange compilation errors is building UML in + /usr/src/linux. If you do this, the first thing you need to do is + clean up the mess you made. The /usr/src/linux/asm link will now + point to /usr/src/linux/asm-um. Make it point back to + /usr/src/linux/asm-i386. Then, move your UML pool someplace else and + build it there. Also see below, where a more specific set of symptoms + is described. + + + + 1133..33.. AA vvaarriieettyy ooff ppaanniiccss aanndd hhaannggss wwiitthh //ttmmpp oonn aa rreeiisseerrffss ffiilleessyyss-- + tteemm + + I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27. + Panics preceded by + + + Detaching pid nnnn + + + + are diagnostic of this problem. This is a reiserfs bug which causes a + thread to occasionally read stale data from a mmapped page shared with + another thread. The fix is to upgrade the filesystem or to have /tmp + be an ext2 filesystem. + + + + 1133..44.. TThhee ccoommppiillee ffaaiillss wwiitthh eerrrroorrss aabboouutt ccoonnfflliiccttiinngg ttyyppeess ffoorr + ''ooppeenn'',, ''dduupp'',, aanndd ''wwaaiittppiidd'' + + This happens when you build in /usr/src/linux. The UML build makes + the include/asm link point to include/asm-um. /usr/include/asm points + to /usr/src/linux/include/asm, so when that link gets moved, files + which need to include the asm-i386 versions of headers get the + incompatible asm-um versions. The fix is to move the include/asm link + back to include/asm-i386 and to do UML builds someplace else. + + + + 1133..55.. UUMMLL ddooeessnn''tt wwoorrkk wwhheenn //ttmmpp iiss aann NNFFSS ffiilleessyysstteemm + + This seems to be a similar situation with the ReiserFS problem above. + Some versions of NFS seems not to handle mmap correctly, which UML + depends on. The workaround is have /tmp be a non-NFS directory. + + + 1133..66.. UUMMLL hhaannggss oonn bboooott wwhheenn ccoommppiilleedd wwiitthh ggpprrooff ssuuppppoorrtt + + If you build UML with gprof support and, early in the boot, it does + this + + + kernel BUG at page_alloc.c:100! + + + + + you have a buggy gcc. You can work around the problem by removing + UM_FASTCALL from CFLAGS in arch/um/Makefile-i386. This will open up + another bug, but that one is fairly hard to reproduce. + + + + 1133..77.. ssyyssllooggdd ddiieess wwiitthh aa SSIIGGTTEERRMM oonn ssttaarrttuupp + + The exact boot error depends on the distribution that you're booting, + but Debian produces this: + + + /etc/rc2.d/S10sysklogd: line 49: 93 Terminated + start-stop-daemon --start --quiet --exec /sbin/syslogd -- $SYSLOGD + + + + + This is a syslogd bug. There's a race between a parent process + installing a signal handler and its child sending the signal. See + this uml-devel post for the details. + + + + 1133..88.. TTUUNN//TTAAPP nneettwwoorrkkiinngg ddooeessnn''tt wwoorrkk oonn aa 22..44 hhoosstt + + There are a couple of problems which were + name="pointed + out"> by Tim Robinson + + +o It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier. + The fix is to upgrade to something more recent and then read the + next item. + + +o If you see + + + File descriptor in bad state + + + + when you bring up the device inside UML, you have a header mismatch + between the original kernel and the upgraded one. Make /usr/src/linux + point at the new headers. This will only be a problem if you build + uml_net yourself. + + + + 1133..99.. YYoouu ccaann nneettwwoorrkk ttoo tthhee hhoosstt bbuutt nnoott ttoo ootthheerr mmaacchhiinneess oonn tthhee + nneett + + If you can connect to the host, and the host can connect to UML, but + you cannot connect to any other machines, then you may need to enable + IP Masquerading on the host. Usually this is only experienced when + using private IP addresses (192.168.x.x or 10.x.x.x) for host/UML + networking, rather than the public address space that your host is + connected to. UML does not enable IP Masquerading, so you will need + to create a static rule to enable it: + + + host% + iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE + + + + + Replace eth0 with the interface that you use to talk to the rest of + the world. + + + Documentation on IP Masquerading, and SNAT, can be found at + www.netfilter.org . + + + If you can reach the local net, but not the outside Internet, then + that is usually a routing problem. The UML needs a default route: + + + UML# + route add default gw gateway IP + + + + + The gateway IP can be any machine on the local net that knows how to + reach the outside world. Usually, this is the host or the local net- + work's gateway. + + + Occasionally, we hear from someone who can reach some machines, but + not others on the same net, or who can reach some ports on other + machines, but not others. These are usually caused by strange + firewalling somewhere between the UML and the other box. You track + this down by running tcpdump on every interface the packets travel + over and see where they disappear. When you find a machine that takes + the packets in, but does not send them onward, that's the culprit. + + + + 1133..1100.. II hhaavvee nnoo rroooott aanndd II wwaanntt ttoo ssccrreeaamm + + Thanks to Birgit Wahlich for telling me about this strange one. It + turns out that there's a limit of six environment variables on the + kernel command line. When that limit is reached or exceeded, argument + processing stops, which means that the 'root=' argument that UML + usually adds is not seen. So, the filesystem has no idea what the + root device is, so it panics. + + + The fix is to put less stuff on the command line. Glomming all your + setup variables into one is probably the best way to go. + + + + 1133..1111.. UUMMLL bbuuiilldd ccoonnfflliicctt bbeettwweeeenn ppttrraaccee..hh aanndd uuccoonntteexxtt..hh + + On some older systems, /usr/include/asm/ptrace.h and + /usr/include/sys/ucontext.h define the same names. So, when they're + included together, the defines from one completely mess up the parsing + of the other, producing errors like: + /usr/include/sys/ucontext.h:47: parse error before + `10' + + + + + plus a pile of warnings. + + + This is a libc botch, which has since been fixed, and I don't see any + way around it besides upgrading. + + + + 1133..1122.. TThhee UUMMLL BBooggooMMiippss iiss eexxaaccttllyy hhaallff tthhee hhoosstt''ss BBooggooMMiippss + + On i386 kernels, there are two ways of running the loop that is used + to calculate the BogoMips rating, using the TSC if it's there or using + a one-instruction loop. The TSC produces twice the BogoMips as the + loop. UML uses the loop, since it has nothing resembling a TSC, and + will get almost exactly the same BogoMips as a host using the loop. + However, on a host with a TSC, its BogoMips will be double the loop + BogoMips, and therefore double the UML BogoMips. + + + + 1133..1133.. WWhheenn yyoouu rruunn UUMMLL,, iitt iimmmmeeddiiaatteellyy sseeggffaauullttss + + If the host is configured with the 2G/2G address space split, that's + why. See ``UML on 2G/2G hosts'' for the details on getting UML to + run on your host. + + + + 1133..1144.. xxtteerrmmss aappppeeaarr,, tthheenn iimmmmeeddiiaatteellyy ddiissaappppeeaarr + + If you're running an up to date kernel with an old release of + uml_utilities, the port-helper program will not work properly, so + xterms will exit straight after they appear. The solution is to + upgrade to the latest release of uml_utilities. Usually this problem + occurs when you have installed a packaged release of UML then compiled + your own development kernel without upgrading the uml_utilities from + the source distribution. + + + + 1133..1155.. AAnnyy ootthheerr ppaanniicc,, hhaanngg,, oorr ssttrraannggee bbeehhaavviioorr + + If you're seeing truly strange behavior, such as hangs or panics that + happen in random places, or you try running the debugger to see what's + happening and it acts strangely, then it could be a problem in the + host kernel. If you're not running a stock Linus or -ac kernel, then + try that. An early version of the preemption patch and a 2.4.10 SuSE + kernel have caused very strange problems in UML. + + + Otherwise, let me know about it. Send a message to one of the UML + mailing lists - either the developer list - user-mode-linux-devel at + lists dot sourceforge dot net (subscription info) or the user list - + user-mode-linux-user at lists dot sourceforge do net (subscription + info), whichever you prefer. Don't assume that everyone knows about + it and that a fix is imminent. + + + If you want to be super-helpful, read ``Diagnosing Problems'' and + follow the instructions contained therein. + 1144.. DDiiaaggnnoossiinngg PPrroobblleemmss + + + If you get UML to crash, hang, or otherwise misbehave, you should + report this on one of the project mailing lists, either the developer + list - user-mode-linux-devel at lists dot sourceforge dot net + (subscription info) or the user list - user-mode-linux-user at lists + dot sourceforge dot net (subscription info). When you do, it is + likely that I will want more information. So, it would be helpful to + read the stuff below, do whatever is applicable in your case, and + report the results to the list. + + + For any diagnosis, you're going to need to build a debugging kernel. + The binaries from this site aren't debuggable. If you haven't done + this before, read about ``Compiling the kernel and modules'' and + ``Kernel debugging'' UML first. + + + 1144..11.. CCaassee 11 :: NNoorrmmaall kkeerrnneell ppaanniiccss + + The most common case is for a normal thread to panic. To debug this, + you will need to run it under the debugger (add 'debug' to the command + line). An xterm will start up with gdb running inside it. Continue + it when it stops in start_kernel and make it crash. Now ^C gdb and + + + If the panic was a "Kernel mode fault", then there will be a segv + frame on the stack and I'm going to want some more information. The + stack might look something like this: + + + (UML gdb) backtrace + #0 0x1009bf76 in __sigprocmask (how=1, set=0x5f347940, oset=0x0) + at ../sysdeps/unix/sysv/linux/sigprocmask.c:49 + #1 0x10091411 in change_sig (signal=10, on=1) at process.c:218 + #2 0x10094785 in timer_handler (sig=26) at time_kern.c:32 + #3 0x1009bf38 in __restore () + at ../sysdeps/unix/sysv/linux/i386/sigaction.c:125 + #4 0x1009534c in segv (address=8, ip=268849158, is_write=2, is_user=0) + at trap_kern.c:66 + #5 0x10095c04 in segv_handler (sig=11) at trap_user.c:285 + #6 0x1009bf38 in __restore () + + + + + I'm going to want to see the symbol and line information for the value + of ip in the segv frame. In this case, you would do the following: + + + (UML gdb) i sym 268849158 + + + + + and + + + (UML gdb) i line *268849158 + + + + + The reason for this is the __restore frame right above the segv_han- + dler frame is hiding the frame that actually segfaulted. So, I have + to get that information from the faulting ip. + + + 1144..22.. CCaassee 22 :: TTrraacciinngg tthhrreeaadd ppaanniiccss + + The less common and more painful case is when the tracing thread + panics. In this case, the kernel debugger will be useless because it + needs a healthy tracing thread in order to work. The first thing to + do is get a backtrace from the tracing thread. This is done by + figuring out what its pid is, firing up gdb, and attaching it to that + pid. You can figure out the tracing thread pid by looking at the + first line of the console output, which will look like this: + + + tracing thread pid = 15851 + + + + + or by running ps on the host and finding the line that looks like + this: + + + jdike 15851 4.5 0.4 132568 1104 pts/0 S 21:34 0:05 ./linux [(tracing thread)] + + + + + If the panic was 'segfault in signals', then follow the instructions + above for collecting information about the location of the seg fault. + + + If the tracing thread flaked out all by itself, then send that + backtrace in and wait for our crack debugging team to fix the problem. + + + 1144..33.. CCaassee 33 :: TTrraacciinngg tthhrreeaadd ppaanniiccss ccaauusseedd bbyy ootthheerr tthhrreeaaddss + + However, there are cases where the misbehavior of another thread + caused the problem. The most common panic of this type is: + + + wait_for_stop failed to wait for to stop with + + + + + In this case, you'll need to get a backtrace from the process men- + tioned in the panic, which is complicated by the fact that the kernel + debugger is defunct and without some fancy footwork, another gdb can't + attach to it. So, this is how the fancy footwork goes: + + In a shell: + + + host% kill -STOP pid + + + + + Run gdb on the tracing thread as described in case 2 and do: + + + (host gdb) call detach(pid) + + + If you get a segfault, do it again. It always works the second time. + + Detach from the tracing thread and attach to that other thread: + + + (host gdb) detach + + + + + + + (host gdb) attach pid + + + + + If gdb hangs when attaching to that process, go back to a shell and + do: + + + host% + kill -CONT pid + + + + + And then get the backtrace: + + + (host gdb) backtrace + + + + + + 1144..44.. CCaassee 44 :: HHaannggss + + Hangs seem to be fairly rare, but they sometimes happen. When a hang + happens, we need a backtrace from the offending process. Run the + kernel debugger as described in case 1 and get a backtrace. If the + current process is not the idle thread, then send in the backtrace. + You can tell that it's the idle thread if the stack looks like this: + + + #0 0x100b1401 in __libc_nanosleep () + #1 0x100a2885 in idle_sleep (secs=10) at time.c:122 + #2 0x100a546f in do_idle () at process_kern.c:445 + #3 0x100a5508 in cpu_idle () at process_kern.c:471 + #4 0x100ec18f in start_kernel () at init/main.c:592 + #5 0x100a3e10 in start_kernel_proc (unused=0x0) at um_arch.c:71 + #6 0x100a383f in signal_tramp (arg=0x100a3dd8) at trap_user.c:50 + + + + + If this is the case, then some other process is at fault, and went to + sleep when it shouldn't have. Run ps on the host and figure out which + process should not have gone to sleep and stayed asleep. Then attach + to it with gdb and get a backtrace as described in case 3. + + + + + + + 1155.. TThhaannkkss + + + A number of people have helped this project in various ways, and this + page gives recognition where recognition is due. + + + If you're listed here and you would prefer a real link on your name, + or no link at all, instead of the despammed email address pseudo-link, + let me know. + + + If you're not listed here and you think maybe you should be, please + let me know that as well. I try to get everyone, but sometimes my + bookkeeping lapses and I forget about contributions. + + + 1155..11.. CCooddee aanndd DDooccuummeennttaattiioonn + + Rusty Russell - + + +o wrote the HOWTO + + +o prodded me into making this project official and putting it on + SourceForge + + +o came up with the way cool UML logo + + +o redid the config process + + + Peter Moulder - Fixed my config and build + processes, and added some useful code to the block driver + + + Bill Stearns - + + +o HOWTO updates + + +o lots of bug reports + + +o lots of testing + + +o dedicated a box (uml.ists.dartmouth.edu) to support UML development + + +o wrote the mkrootfs script, which allows bootable filesystems of + RPM-based distributions to be cranked out + + +o cranked out a large number of filesystems with said script + + + Jim Leu - Wrote the virtual ethernet driver + and associated usermode tools + + Lars Brinkhoff - Contributed the ptrace + proxy from his own project to allow easier + kernel debugging + + + Andrea Arcangeli - Redid some of the early boot + code so that it would work on machines with Large File Support + + + Chris Emerson - Did + the first UML port to Linux/ppc + + + Harald Welte - Wrote the multicast + transport for the network driver + + + Jorgen Cederlof - Added special file support to hostfs + + + Greg Lonnon - Changed the ubd driver + to allow it to layer a COW file on a shared read-only filesystem and + wrote the iomem emulation support + + + Henrik Nordstrom - Provided a variety + of patches, fixes, and clues + + + Lennert Buytenhek - Contributed various patches, a rewrite of the + network driver, the first implementation of the mconsole driver, and + did the bulk of the work needed to get SMP working again. + + + Yon Uriarte - Fixed the TUN/TAP network backend while I slept. + + + Adam Heath - Made a bunch of nice cleanups to the initialization code, + plus various other small patches. + + + Matt Zimmerman - Matt volunteered to be the UML Debian maintainer and + is doing a real nice job of it. He also noticed and fixed a number of + actually and potentially exploitable security holes in uml_net. Plus + the occasional patch. I like patches. + + + James McMechan - James seems to have taken over maintenance of the ubd + driver and is doing a nice job of it. + + + Chandan Kudige - wrote the umlgdb script which automates the reloading + of module symbols. + + + Steve Schmidtke - wrote the UML slirp transport and hostaudio drivers, + enabling UML processes to access audio devices on the host. He also + submitted patches for the slip transport and lots of other things. + + + David Coulson - + + +o Set up the usermodelinux.org site, + which is a great way of keeping the UML user community on top of + UML goings-on. + + +o Site documentation and updates + + +o Nifty little UML management daemon UMLd + + + +o Lots of testing and bug reports + + + + + 1155..22.. FFlluusshhiinngg oouutt bbuuggss + + + + +o Yuri Pudgorodsky + + +o Gerald Britton + + +o Ian Wehrman + + +o Gord Lamb + + +o Eugene Koontz + + +o John H. Hartman + + +o Anders Karlsson + + +o Daniel Phillips + + +o John Fremlin + + +o Rainer Burgstaller + + +o James Stevenson + + +o Matt Clay + + +o Cliff Jefferies + + +o Geoff Hoff + + +o Lennert Buytenhek + + +o Al Viro + + +o Frank Klingenhoefer + + +o Livio Baldini Soares + + +o Jon Burgess + + +o Petru Paler + + +o Paul + + +o Chris Reahard + + +o Sverker Nilsson + + +o Gong Su + + +o johan verrept + + +o Bjorn Eriksson + + +o Lorenzo Allegrucci + + +o Muli Ben-Yehuda + + +o David Mansfield + + +o Howard Goff + + +o Mike Anderson + + +o John Byrne + + +o Sapan J. Batia + + +o Iris Huang + + +o Jan Hudec + + +o Voluspa + + + + + 1155..33.. BBuugglleettss aanndd cclleeaann--uuppss + + + + +o Dave Zarzycki + + +o Adam Lazur + + +o Boria Feigin + + +o Brian J. Murrell + + +o JS + + +o Roman Zippel + + +o Wil Cooley + + +o Ayelet Shemesh + + +o Will Dyson + + +o Sverker Nilsson + + +o dvorak + + +o v.naga srinivas + + +o Shlomi Fish + + +o Roger Binns + + +o johan verrept + + +o MrChuoi + + +o Peter Cleve + + +o Vincent Guffens + + +o Nathan Scott + + +o Patrick Caulfield + + +o jbearce + + +o Catalin Marinas + + +o Shane Spencer + + +o Zou Min + + + +o Ryan Boder + + +o Lorenzo Colitti + + +o Gwendal Grignou + + +o Andre' Breiler + + +o Tsutomu Yasuda + + + + 1155..44.. CCaassee SSttuuddiieess + + + +o Jon Wright + + +o William McEwan + + +o Michael Richardson + + + + 1155..55.. OOtthheerr ccoonnttrriibbuuttiioonnss + + + Bill Carr made the Red Hat mkrootfs script + work with RH 6.2. + + Michael Jennings sent in some material which + is now gracing the top of the index page of this site. + + SGI (and more specifically Ralf Baechle ) gave me an account on oss.sgi.com + . The bandwidth there made it possible to + produce most of the filesystems available on the project download + page. + + Laurent Bonnaud took the old grotty + Debian filesystem that I've been distributing and updated it to 2.2. + It is now available by itself here. + + Rik van Riel gave me some ftp space on ftp.nl.linux.org so I can make + releases even when Sourceforge is broken. + + Rodrigo de Castro looked at my broken pte code and told me what was + wrong with it, letting me fix a long-standing (several weeks) and + serious set of bugs. + + Chris Reahard built a specialized root filesystem for running a DNS + server jailed inside UML. It's available from the download + page in the Jail + Filesystems section. + + + + + + + + + + + + diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt new file mode 100644 index 000000000000..36c367c73084 --- /dev/null +++ b/Documentation/vm/cleancache.txt @@ -0,0 +1,278 @@ +MOTIVATION + +Cleancache is a new optional feature provided by the VFS layer that +potentially dramatically increases page cache effectiveness for +many workloads in many environments at a negligible cost. + +Cleancache can be thought of as a page-granularity victim cache for clean +pages that the kernel's pageframe replacement algorithm (PFRA) would like +to keep around, but can't since there isn't enough memory. So when the +PFRA "evicts" a page, it first attempts to use cleancache code to +put the data contained in that page into "transcendent memory", memory +that is not directly accessible or addressable by the kernel and is +of unknown and possibly time-varying size. + +Later, when a cleancache-enabled filesystem wishes to access a page +in a file on disk, it first checks cleancache to see if it already +contains it; if it does, the page of data is copied into the kernel +and a disk access is avoided. + +Transcendent memory "drivers" for cleancache are currently implemented +in Xen (using hypervisor memory) and zcache (using in-kernel compressed +memory) and other implementations are in development. + +FAQs are included below. + +IMPLEMENTATION OVERVIEW + +A cleancache "backend" that provides transcendent memory registers itself +to the kernel's cleancache "frontend" by calling cleancache_register_ops, +passing a pointer to a cleancache_ops structure with funcs set appropriately. +Note that cleancache_register_ops returns the previous settings so that +chaining can be performed if desired. The functions provided must conform to +certain semantics as follows: + +Most important, cleancache is "ephemeral". Pages which are copied into +cleancache have an indefinite lifetime which is completely unknowable +by the kernel and so may or may not still be in cleancache at any later time. +Thus, as its name implies, cleancache is not suitable for dirty pages. +Cleancache has complete discretion over what pages to preserve and what +pages to discard and when. + +Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a +pool id which, if positive, must be saved in the filesystem's superblock; +a negative return value indicates failure. A "put_page" will copy a +(presumably about-to-be-evicted) page into cleancache and associate it with +the pool id, a file key, and a page index into the file. (The combination +of a pool id, a file key, and an index is sometimes called a "handle".) +A "get_page" will copy the page, if found, from cleancache into kernel memory. +A "flush_page" will ensure the page no longer is present in cleancache; +a "flush_inode" will flush all pages associated with the specified file; +and, when a filesystem is unmounted, a "flush_fs" will flush all pages in +all files specified by the given pool id and also surrender the pool id. + +An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache +to treat the pool as shared using a 128-bit UUID as a key. On systems +that may run multiple kernels (such as hard partitioned or virtualized +systems) that may share a clustered filesystem, and where cleancache +may be shared among those kernels, calls to init_shared_fs that specify the +same UUID will receive the same pool id, thus allowing the pages to +be shared. Note that any security requirements must be imposed outside +of the kernel (e.g. by "tools" that control cleancache). Or a +cleancache implementation can simply disable shared_init by always +returning a negative value. + +If a get_page is successful on a non-shared pool, the page is flushed (thus +making cleancache an "exclusive" cache). On a shared pool, the page +is NOT flushed on a successful get_page so that it remains accessible to +other sharers. The kernel is responsible for ensuring coherency between +cleancache (shared or not), the page cache, and the filesystem, using +cleancache flush operations as required. + +Note that cleancache must enforce put-put-get coherency and get-get +coherency. For the former, if two puts are made to the same handle but +with different data, say AAA by the first put and BBB by the second, a +subsequent get can never return the stale data (AAA). For get-get coherency, +if a get for a given handle fails, subsequent gets for that handle will +never succeed unless preceded by a successful put with that handle. + +Last, cleancache provides no SMP serialization guarantees; if two +different Linux threads are simultaneously putting and flushing a page +with the same handle, the results are indeterminate. Callers must +lock the page to ensure serial behavior. + +CLEANCACHE PERFORMANCE METRICS + +Cleancache monitoring is done by sysfs files in the +/sys/kernel/mm/cleancache directory. The effectiveness of cleancache +can be measured (across all filesystems) with: + +succ_gets - number of gets that were successful +failed_gets - number of gets that failed +puts - number of puts attempted (all "succeed") +flushes - number of flushes attempted + +A backend implementatation may provide additional metrics. + +FAQ + +1) Where's the value? (Andrew Morton) + +Cleancache provides a significant performance benefit to many workloads +in many environments with negligible overhead by improving the +effectiveness of the pagecache. Clean pagecache pages are +saved in transcendent memory (RAM that is otherwise not directly +addressable to the kernel); fetching those pages later avoids "refaults" +and thus disk reads. + +Cleancache (and its sister code "frontswap") provide interfaces for +this transcendent memory (aka "tmem"), which conceptually lies between +fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. +Disallowing direct kernel or userland reads/writes to tmem +is ideal when data is transformed to a different form and size (such +as with compression) or secretly moved (as might be useful for write- +balancing for some RAM-like devices). Evicted page-cache pages (and +swap pages) are a great use for this kind of slower-than-RAM-but-much- +faster-than-disk transcendent memory, and the cleancache (and frontswap) +"page-object-oriented" specification provides a nice way to read and +write -- and indirectly "name" -- the pages. + +In the virtual case, the whole point of virtualization is to statistically +multiplex physical resources across the varying demands of multiple +virtual machines. This is really hard to do with RAM and efforts to +do it well with no kernel change have essentially failed (except in some +well-publicized special-case workloads). Cleancache -- and frontswap -- +with a fairly small impact on the kernel, provide a huge amount +of flexibility for more dynamic, flexible RAM multiplexing. +Specifically, the Xen Transcendent Memory backend allows otherwise +"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple +virtual machines, but the pages can be compressed and deduplicated to +optimize RAM utilization. And when guest OS's are induced to surrender +underutilized RAM (e.g. with "self-ballooning"), page cache pages +are the first to go, and cleancache allows those pages to be +saved and reclaimed if overall host system memory conditions allow. + +And the identical interface used for cleancache can be used in +physical systems as well. The zcache driver acts as a memory-hungry +device that stores pages of data in a compressed state. And +the proposed "RAMster" driver shares RAM across multiple physical +systems. + +2) Why does cleancache have its sticky fingers so deep inside the + filesystems and VFS? (Andrew Morton and Christoph Hellwig) + +The core hooks for cleancache in VFS are in most cases a single line +and the minimum set are placed precisely where needed to maintain +coherency (via cleancache_flush operations) between cleancache, +the page cache, and disk. All hooks compile into nothingness if +cleancache is config'ed off and turn into a function-pointer- +compare-to-NULL if config'ed on but no backend claims the ops +functions, or to a compare-struct-element-to-negative if a +backend claims the ops functions but a filesystem doesn't enable +cleancache. + +Some filesystems are built entirely on top of VFS and the hooks +in VFS are sufficient, so don't require an "init_fs" hook; the +initial implementation of cleancache didn't provide this hook. +But for some filesystems (such as btrfs), the VFS hooks are +incomplete and one or more hooks in fs-specific code are required. +And for some other filesystems, such as tmpfs, cleancache may +be counterproductive. So it seemed prudent to require a filesystem +to "opt in" to use cleancache, which requires adding a hook in +each filesystem. Not all filesystems are supported by cleancache +only because they haven't been tested. The existing set should +be sufficient to validate the concept, the opt-in approach means +that untested filesystems are not affected, and the hooks in the +existing filesystems should make it very easy to add more +filesystems in the future. + +The total impact of the hooks to existing fs and mm files is only +about 40 lines added (not counting comments and blank lines). + +3) Why not make cleancache asynchronous and batched so it can + more easily interface with real devices with DMA instead + of copying each individual page? (Minchan Kim) + +The one-page-at-a-time copy semantics simplifies the implementation +on both the frontend and backend and also allows the backend to +do fancy things on-the-fly like page compression and +page deduplication. And since the data is "gone" (copied into/out +of the pageframe) before the cleancache get/put call returns, +a great deal of race conditions and potential coherency issues +are avoided. While the interface seems odd for a "real device" +or for real kernel-addressable RAM, it makes perfect sense for +transcendent memory. + +4) Why is non-shared cleancache "exclusive"? And where is the + page "flushed" after a "get"? (Minchan Kim) + +The main reason is to free up space in transcendent memory and +to avoid unnecessary cleancache_flush calls. If you want inclusive, +the page can be "put" immediately following the "get". If +put-after-get for inclusive becomes common, the interface could +be easily extended to add a "get_no_flush" call. + +The flush is done by the cleancache backend implementation. + +5) What's the performance impact? + +Performance analysis has been presented at OLS'09 and LCA'10. +Briefly, performance gains can be significant on most workloads, +especially when memory pressure is high (e.g. when RAM is +overcommitted in a virtual workload); and because the hooks are +invoked primarily in place of or in addition to a disk read/write, +overhead is negligible even in worst case workloads. Basically +cleancache replaces I/O with memory-copy-CPU-overhead; on older +single-core systems with slow memory-copy speeds, cleancache +has little value, but in newer multicore machines, especially +consolidated/virtualized machines, it has great value. + +6) How do I add cleancache support for filesystem X? (Boaz Harrash) + +Filesystems that are well-behaved and conform to certain +restrictions can utilize cleancache simply by making a call to +cleancache_init_fs at mount time. Unusual, misbehaving, or +poorly layered filesystems must either add additional hooks +and/or undergo extensive additional testing... or should just +not enable the optional cleancache. + +Some points for a filesystem to consider: + +- The FS should be block-device-based (e.g. a ram-based FS such + as tmpfs should not enable cleancache) +- To ensure coherency/correctness, the FS must ensure that all + file removal or truncation operations either go through VFS or + add hooks to do the equivalent cleancache "flush" operations +- To ensure coherency/correctness, either inode numbers must + be unique across the lifetime of the on-disk file OR the + FS must provide an "encode_fh" function. +- The FS must call the VFS superblock alloc and deactivate routines + or add hooks to do the equivalent cleancache calls done there. +- To maximize performance, all pages fetched from the FS should + go through the do_mpag_readpage routine or the FS should add + hooks to do the equivalent (cf. btrfs) +- Currently, the FS blocksize must be the same as PAGESIZE. This + is not an architectural restriction, but no backends currently + support anything different. +- A clustered FS should invoke the "shared_init_fs" cleancache + hook to get best performance for some backends. + +7) Why not use the KVA of the inode as the key? (Christoph Hellwig) + +If cleancache would use the inode virtual address instead of +inode/filehandle, the pool id could be eliminated. But, this +won't work because cleancache retains pagecache data pages +persistently even when the inode has been pruned from the +inode unused list, and only flushes the data page if the file +gets removed/truncated. So if cleancache used the inode kva, +there would be potential coherency issues if/when the inode +kva is reused for a different file. Alternately, if cleancache +flushed the pages when the inode kva was freed, much of the value +of cleancache would be lost because the cache of pages in cleanache +is potentially much larger than the kernel pagecache and is most +useful if the pages survive inode cache removal. + +8) Why is a global variable required? + +The cleancache_enabled flag is checked in all of the frequently-used +cleancache hooks. The alternative is a function call to check a static +variable. Since cleancache is enabled dynamically at runtime, systems +that don't enable cleancache would suffer thousands (possibly +tens-of-thousands) of unnecessary function calls per second. So the +global variable allows cleancache to be enabled by default at compile +time, but have insignificant performance impact when cleancache remains +disabled at runtime. + +9) Does cleanache work with KVM? + +The memory model of KVM is sufficiently different that a cleancache +backend may have less value for KVM. This remains to be tested, +especially in an overcommitted system. + +10) Does cleancache work in userspace? It sounds useful for + memory hungry caches like web browsers. (Jamie Lokier) + +No plans yet, though we agree it sounds useful, at least for +apps that bypass the page cache (e.g. O_DIRECT). + +Last updated: Dan Magenheimer, April 13 2011 diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt index 12f9ba20ccb7..550068466605 100644 --- a/Documentation/vm/hwpoison.txt +++ b/Documentation/vm/hwpoison.txt @@ -129,12 +129,12 @@ Limit injection to pages owned by memgroup. Specified by inode number of the memcg. Example: - mkdir /cgroup/hwpoison + mkdir /sys/fs/cgroup/mem/hwpoison usemem -m 100 -s 1000 & - echo `jobs -p` > /cgroup/hwpoison/tasks + echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks - memcg_ino=$(ls -id /cgroup/hwpoison | cut -f1 -d' ') + memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ') echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg page-types -p `pidof init` --hwpoison # shall do nothing diff --git a/Documentation/vm/locking b/Documentation/vm/locking index 25fadb448760..f61228bd6395 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -66,7 +66,7 @@ in some cases it is not really needed. Eg, vm_start is modified by expand_stack(), it is hard to come up with a destructive scenario without having the vmlist protection in this case. -The page_table_lock nests with the inode i_mmap_lock and the kmem cache +The page_table_lock nests with the inode i_mmap_mutex and the kmem cache c_spinlock spinlocks. This is okay, since the kmem code asks for pages after dropping c_spinlock. The page_table_lock also nests with pagecache_lock and pagemap_lru_lock spinlocks, and no code asks for memory with these locks diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt index 01c513fac40e..a0b577de918f 100644 --- a/Documentation/workqueue.txt +++ b/Documentation/workqueue.txt @@ -12,6 +12,7 @@ CONTENTS 4. Application Programming Interface (API) 5. Example Execution Scenarios 6. Guidelines +7. Debugging 1. Introduction @@ -379,3 +380,42 @@ If q1 has WQ_CPU_INTENSIVE set, * Unless work items are expected to consume a huge amount of CPU cycles, using a bound wq is usually beneficial due to the increased level of locality in wq operations and work item execution. + + +7. Debugging + +Because the work functions are executed by generic worker threads +there are a few tricks needed to shed some light on misbehaving +workqueue users. + +Worker threads show up in the process list as: + +root 5671 0.0 0.0 0 0 ? S 12:07 0:00 [kworker/0:1] +root 5672 0.0 0.0 0 0 ? S 12:07 0:00 [kworker/1:2] +root 5673 0.0 0.0 0 0 ? S 12:12 0:00 [kworker/0:0] +root 5674 0.0 0.0 0 0 ? S 12:13 0:00 [kworker/1:0] + +If kworkers are going crazy (using too much cpu), there are two types +of possible problems: + + 1. Something beeing scheduled in rapid succession + 2. A single work item that consumes lots of cpu cycles + +The first one can be tracked using tracing: + + $ echo workqueue:workqueue_queue_work > /sys/kernel/debug/tracing/set_event + $ cat /sys/kernel/debug/tracing/trace_pipe > out.txt + (wait a few secs) + ^C + +If something is busy looping on work queueing, it would be dominating +the output and the offender can be determined with the work item +function. + +For the second type of problems it should be possible to just check +the stack trace of the offending worker thread. + + $ cat /proc/THE_OFFENDING_KWORKER/stack + +The work item's function should be trivially visible in the stack +trace. diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index 092e596a1301..c54b4f503e2a 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -206,7 +206,7 @@ IOMMU (input/output memory management unit) (e.g. because you have < 3 GB memory). Kernel boot message: "PCI-DMA: Disabling IOMMU" - 2. : AMD GART based hardware IOMMU. + 2. : AMD GART based hardware IOMMU. Kernel boot message: "PCI-DMA: using GART IOMMU" 3. : Software IOMMU implementation. Used diff --git a/Documentation/zh_CN/email-clients.txt b/Documentation/zh_CN/email-clients.txt new file mode 100644 index 000000000000..5d65e323d060 --- /dev/null +++ b/Documentation/zh_CN/email-clients.txt @@ -0,0 +1,210 @@ +锘?Chinese translated version of Documentation/email-clients.txt + +If you have any comment or update to the content, please contact the +original document maintainer directly. However, if you have a problem +communicating in English you can also ask the Chinese maintainer for +help. Contact the Chinese maintainer if this translation is outdated +or if there is a problem with the translation. + +Chinese maintainer: Harry Wei +--------------------------------------------------------------------- +Documentation/email-clients.txt ???涓????缈昏?? + +濡??????宠??璁烘????存?版???????????瀹癸??璇风?存?ヨ??绯诲?????妗g??缁存?よ?????濡????浣?浣跨?ㄨ?辨?? +浜ゆ???????伴?剧??璇?锛?涔????浠ュ??涓???????缁存?よ??姹???┿??濡???????缈昏????存?颁???????舵?????缈? +璇?瀛???ㄩ??棰?锛?璇疯??绯讳腑??????缁存?よ????? + +涓???????缁存?よ??锛? 璐惧??濞? Harry Wei +涓???????缈昏?????锛? 璐惧??濞? Harry Wei +涓?????????¤?????锛? Yinglin Luan + Xiaochen Wang + yaxinsn + +浠ヤ??涓烘?f?? +--------------------------------------------------------------------- + +Linux???浠跺?㈡?风?????缃?淇℃?? +====================================================================== + +?????????缃? +---------------------------------------------------------------------- +Linux?????歌ˉ涓???????杩????浠惰?????浜ょ??锛????濂芥??琛ヤ??浣?涓洪??浠朵????????宓?????????????浜?缁存?よ?? +??ユ?堕??浠讹??浣???????浠剁?????瀹规?煎??搴?璇ユ??"text/plain"?????惰??锛????浠朵????????涓?璧???????锛? +???涓鸿??浼?浣胯ˉ涓????寮???ㄩ?ㄥ????ㄨ??璁鸿??绋?涓???????寰???伴?俱?? + +??ㄦ?ュ?????Linux?????歌ˉ涓???????浠跺?㈡?风????ㄥ?????琛ヤ????跺??璇ュ??浜?????????????濮???舵?????渚?濡?锛? +浠?浠?涓???芥?瑰?????????????ゅ?惰〃绗???????绌烘?硷???????虫????ㄦ??涓?琛????寮?澶存?????缁?灏俱?? + +涓?瑕????杩?"format=flowed"妯″????????琛ヤ?????杩???蜂??寮?璧蜂?????棰????浠ュ?????瀹崇?????琛???? + +涓?瑕?璁╀????????浠跺?㈡?风??杩?琛??????ㄦ?㈣?????杩???蜂??浼???村??浣????琛ヤ????? + +???浠跺?㈡?风??涓???芥?瑰???????????瀛?绗????缂??????瑰?????瑕??????????琛ヤ???????芥??ASCII??????UTF-8缂??????瑰??锛? +濡????浣?浣跨??UTF-8缂??????瑰???????????浠讹????d??浣?灏?浼???垮??涓?浜??????藉????????瀛?绗???????棰???? + +???浠跺?㈡?风??搴?璇ュ舰???骞朵??淇???? References: ?????? In-Reply-To: ???棰?锛???d?? +???浠惰??棰?灏变??浼?涓??????? + +澶???剁??甯?(?????????璐寸??甯?)???甯镐????界?ㄤ??琛ヤ??锛????涓哄?惰〃绗?浼?杞????涓虹┖??笺??浣跨??xclipboard, xclip +??????xcutsel涔?璁稿??浠ワ??浣???????濂芥??璇?涓?涓?????????垮??浣跨?ㄥ????剁??甯???? + +涓?瑕???ㄤ娇???PGP/GPG缃插????????浠朵腑??????琛ヤ?????杩???蜂??浣垮??寰?澶???????涓???借?诲??????????ㄤ??浣????琛ヤ????? +锛?杩?涓????棰?搴?璇ユ?????浠ヤ慨澶????锛? + +??ㄧ???????搁??浠跺??琛ㄥ?????琛ヤ??涔????锛?缁????宸卞?????涓?涓?琛ヤ?????涓?涓???????涓绘??锛?淇?瀛???ユ?跺?扮?? +???浠讹??灏?琛ヤ?????'patch'??戒护???涓?锛?濡??????????浜?锛????缁??????搁??浠跺??琛ㄥ???????? + + +涓?浜????浠跺?㈡?风?????绀? +---------------------------------------------------------------------- +杩????缁???轰??浜?璇?缁????MUA???缃????绀猴?????浠ョ?ㄤ??缁?Linux?????稿?????琛ヤ?????杩?浜?骞朵???????虫?? +?????????杞?浠跺?????缃???荤????? + +璇存??锛? +TUI = 浠ユ?????涓哄?虹???????ㄦ?锋?ュ?? +GUI = ??惧舰?????㈢?ㄦ?锋?ュ?? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Alpine (TUI) + +???缃????椤癸?? +???"Sending Preferences"??ㄥ??锛? + +- "Do Not Send Flowed Text"蹇?椤诲????? +- "Strip Whitespace Before Sending"蹇?椤诲?抽?? + +褰???????浠舵?讹????????搴?璇ユ?惧?ㄨˉ涓?浼???虹?扮????版?癸????跺?????涓?CTRL-R缁???????锛?浣挎??瀹???? +琛ヤ?????浠跺????ュ?伴??浠朵腑??? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Evolution (GUI) + +涓?浜?寮????????????????浣跨?ㄥ????????琛ヤ?? + +褰??????╅??浠堕??椤癸??Preformat + 浠?Format->Heading->Preformatted (Ctrl-7)??????宸ュ?锋?? + +??跺??浣跨??锛? + Insert->Text File... (Alt-n x)?????ヨˉ涓????浠躲?? + +浣?杩????浠?"diff -Nru old.c new.c | xclip"锛???????Preformat锛???跺??浣跨?ㄤ腑??撮??杩?琛?绮?甯???? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Kmail (GUI) + +涓?浜?寮????????????????浣跨?ㄥ????????琛ヤ????? + +榛?璁よ?剧疆涓?涓?HTML??煎??????????????锛?涓?瑕??????ㄥ????? + +褰?涔????涓?灏????浠剁????跺??锛???ㄩ??椤逛?????涓?瑕??????╄????ㄦ?㈣????????涓????缂虹?瑰氨???浣???ㄩ??浠朵腑杈???ョ??浠讳???????? +??戒??浼?琚??????ㄦ?㈣??锛????姝や??蹇?椤诲?ㄥ?????琛ヤ??涔?????????ㄦ?㈣????????绠?????????规??灏辨???????ㄨ????ㄦ?㈣????ヤ功??????浠讹?? +??跺?????瀹?淇?瀛?涓鸿??绋裤??涓????浣???ㄨ??绋夸腑???娆℃??寮?瀹?锛?瀹?宸茬????ㄩ?ㄨ????ㄦ?㈣??浜?锛???d??浣???????浠惰?界?舵病??? +?????╄????ㄦ?㈣??锛?浣????杩?涓?浼?澶卞?诲凡???????????ㄦ?㈣????? + +??ㄩ??浠剁??搴????锛??????ヨˉ涓?涔????锛???句??甯哥?ㄧ??琛ヤ??瀹????绗?锛?涓?涓?杩?瀛????(---)??? + +??跺?????"Message"????????$??锛??????╂????ユ??浠讹????ョ????????浣????琛ヤ?????浠躲??杩????涓?涓?棰?澶???????椤癸??浣????浠? +???杩?瀹????缃?浣???????浠跺缓绔?宸ュ?锋????????锛?杩????浠ュ甫涓?"insert file"??炬????? + +浣????浠ュ????ㄥ?伴??杩?GPG???璁伴??浠讹??浣???????宓?琛ヤ?????濂戒??瑕?浣跨??GPG???璁板??浠????浣?涓哄??宓??????????绛惧??琛ヤ??锛? +褰?浠?GPG涓???????7浣?缂??????朵??浣夸??浠?????????村??澶??????? + +濡????浣????瑕?浠ラ??浠剁??褰㈠????????琛ヤ??锛???d??灏卞?抽????瑰?婚??浠讹????跺?????涓?灞???э??绐????"Suggest automatic +display"锛?杩???峰??宓????浠舵?村?规??璁╄?昏???????般?? + +褰?浣?瑕?淇?瀛?灏?瑕?????????????宓???????琛ヤ??锛?浣????浠ヤ??娑???????琛ㄧ????奸????╁?????琛ヤ????????浠讹????跺????冲?婚????? +"save as"???浣????浠ヤ娇??ㄤ??涓?娌℃????存?圭????????琛ヤ????????浠讹??濡????瀹????浠ユ?g‘???褰㈠??缁???????褰?浣?姝g????ㄥ?? +???宸辩??绐???d??涓?瀵????锛???f?舵病??????椤瑰??浠ヤ??瀛????浠?--宸茬?????涓?涓?杩???风??bug琚?姹???ュ?颁??kmail???bugzilla +骞朵??甯????杩?灏?浼?琚?澶??????????浠舵??浠ュ?????瀵规??涓???ㄦ?峰??璇诲???????????琚?淇?瀛????锛????浠ュ?????浣???虫?????浠跺????跺?板?朵????版?癸?? +浣?涓?寰?涓????浠?浠????????????逛负缁?????????翠?????璇汇?? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Lotus Notes (GUI) + +涓?瑕?浣跨?ㄥ????? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Mutt (TUI) + +寰?澶?Linux寮????浜哄??浣跨??mutt瀹㈡?风??锛????浠ヨ?????瀹????瀹?宸ヤ????????甯告??浜???? + +Mutt涓????甯?缂?杈????锛????浠ヤ??绠′??浣跨?ㄤ??涔?缂?杈???ㄩ?戒??搴?璇ュ甫????????ㄦ??琛????澶у????扮??杈???ㄩ?藉甫??? +涓?涓?"insert file"???椤癸??瀹????浠ラ??杩?涓???瑰?????浠跺??瀹圭????瑰???????ユ??浠躲?? + +'vim'浣?涓?mutt???缂?杈????锛? + set editor="vi" + + 濡????浣跨??xclip锛???插?ヤ互涓???戒护 + :set paste + ???涓????涔??????????shift-insert??????浣跨?? + :r filename + +濡??????宠?????琛ヤ??浣?涓哄??宓?????????? +(a)ttach宸ヤ?????寰?濂斤??涓?甯????"set paste"??? + +???缃????椤癸?? +瀹?搴?璇ヤ互榛?璁よ?剧疆???褰㈠??宸ヤ????? +??惰??锛????"send_charset"璁剧疆涓?"us-ascii::utf-8"涔????涓?涓?涓???????涓绘????? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Pine (TUI) + +Pine杩???绘??涓?浜?绌烘?煎????????棰?锛?浣????杩?浜???板?ㄥ??璇ラ?借??淇?澶?浜???? + +濡???????浠ワ??璇蜂娇???alpine(pine???缁ф?胯??) + +???缃????椤癸?? +- ???杩?????????????瑕?娑???ゆ??绋??????? +- "no-strip-whitespace-before-send"???椤逛????????瑕??????? + + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sylpheed (GUI) + +- ???宓??????????浠ュ??濂界??宸ヤ??锛???????浣跨?ㄩ??浠讹????? +- ???璁镐娇??ㄥ????ㄧ??缂?杈???ㄣ?? +- 瀵逛?????褰?杈?澶???堕??甯告????? +- 濡???????杩?non-SSL杩???ワ?????娉?浣跨??TLS SMTP????????? +- ??ㄧ?????绐???d腑???涓?涓?寰??????ㄧ??ruler bar??? +- 缁???板?????涓?娣诲????板??灏变??浼?姝g‘???浜?瑙f?剧ず?????? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Thunderbird (GUI) + +榛?璁ゆ????典??锛?thunderbird寰?瀹规??????????????锛?浣????杩????涓?浜???规?????浠ュ己??跺?????寰???村ソ??? + +- ??ㄧ?ㄦ?峰????疯?剧疆???锛?缁???????瀵诲??锛?涓?瑕???????"Compose messages in HTML format"??? + +- 缂?杈?浣????Thunderbird???缃?璁剧疆??ヤ娇瀹?涓?瑕????琛?浣跨??锛?user_pref("mailnews.wraplength", 0); + +- 缂?杈?浣????Thunderbird???缃?璁剧疆锛?浣垮??涓?瑕?浣跨??"format=flowed"??煎??锛?user_pref("mailnews. + send_plaintext_flowed", false); + +- 浣????瑕?浣?Thunderbird???涓洪???????煎????瑰??锛? + 濡????榛?璁ゆ????典??浣?涔??????????HTML??煎??锛???d?????寰???俱??浠?浠?浠????棰???????涓????妗?涓???????"Preformat"??煎????? + 濡????榛?璁ゆ????典??浣?涔??????????????????煎??锛?浣?涓?寰????瀹???逛负HTML??煎??锛?浠?浠?浣?涓轰??娆℃?х??锛???ヤ功?????扮??娑????锛? + ??跺??寮哄?朵娇瀹??????版???????煎??锛???????瀹?灏变?????琛????瑕?瀹???板??锛???ㄥ??淇$????炬??涓?浣跨??shift?????ヤ娇瀹????涓?HTML + ??煎??锛???跺?????棰???????涓????妗?涓???????"Preformat"??煎????? + +- ???璁镐娇??ㄥ????ㄧ??缂?杈????锛? + ???瀵?Thunderbird???琛ヤ?????绠?????????规??灏辨??浣跨?ㄤ??涓?"external editor"??╁??锛???跺??浣跨?ㄤ????????娆㈢?? + $EDITOR??ヨ?诲???????????骞惰ˉ涓???版?????涓????瑕?瀹???板??锛????浠ヤ??杞藉苟涓?瀹?瑁?杩?涓???╁??锛???跺??娣诲??涓?涓?浣跨?ㄥ????? + ??????View->Toolbars->Customize...??????褰?浣?涔????淇℃???????跺??浠?浠???瑰?诲??灏卞??浠ヤ????? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +TkRat (GUI) + +???浠ヤ娇??ㄥ?????浣跨??"Insert file..."??????澶???ㄧ??缂?杈???ㄣ?? + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Gmail (Web GUI) + +涓?瑕?浣跨?ㄥ????????琛ヤ????? + +Gmail缃?椤靛?㈡?风???????ㄥ?版????惰〃绗?杞????涓虹┖??笺?? + +??界?跺?惰〃绗?杞????涓虹┖??奸??棰????浠ヨ??澶???ㄧ??杈???ㄨВ??筹???????跺??杩?浼?浣跨?ㄥ??杞???㈣?????姣?琛???????涓?78涓?瀛?绗???? + +???涓?涓????棰????Gmail杩?浼????浠讳??涓????ASCII???瀛?绗????淇℃????逛负base64缂???????瀹????涓?瑗垮????????娆ф床浜虹?????瀛???? + + ### diff --git a/MAINTAINERS b/MAINTAINERS index ec3600306289..f0358cd91de3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -151,6 +151,7 @@ S: Maintained F: drivers/net/hamradio/6pack.c 8169 10/100/1000 GIGABIT ETHERNET DRIVER +M: Realtek linux nic maintainers M: Francois Romieu L: netdev@vger.kernel.org S: Maintained @@ -222,10 +223,8 @@ S: Maintained F: drivers/platform/x86/acerhdf.c ACER WMI LAPTOP EXTRAS -M: Carlos Corbacho -L: aceracpi@googlegroups.com (subscribers-only) +M: Joey Lee L: platform-driver-x86@vger.kernel.org -W: http://code.google.com/p/aceracpi S: Maintained F: drivers/platform/x86/acer-wmi.c @@ -270,10 +269,8 @@ S: Supported F: drivers/acpi/video.c ACPI WMI DRIVER -M: Carlos Corbacho L: platform-driver-x86@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ -S: Maintained +S: Orphan F: drivers/platform/x86/wmi.c AD1889 ALSA SOUND DRIVER @@ -286,35 +283,35 @@ F: sound/pci/ad1889.* AD525X ANALOG DEVICES DIGITAL POTENTIOMETERS DRIVER M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD5254 S: Supported F: drivers/misc/ad525x_dpot.c AD5398 CURRENT REGULATOR DRIVER (AD5398/AD5821) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD5398 S: Supported F: drivers/regulator/ad5398.c AD714X CAPACITANCE TOUCH SENSOR DRIVER (AD7142/3/7/8/7A) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD7142 S: Supported F: drivers/input/misc/ad714x.c AD7877 TOUCHSCREEN DRIVER M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD7877 S: Supported F: drivers/input/touchscreen/ad7877.c AD7879 TOUCHSCREEN DRIVER (AD7879/AD7889) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/AD7879 S: Supported F: drivers/input/touchscreen/ad7879.c @@ -340,7 +337,7 @@ F: drivers/net/wireless/adm8211.* ADP5520 BACKLIGHT DRIVER WITH IO EXPANDER (ADP5520/ADP5501) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADP5520 S: Supported F: drivers/mfd/adp5520.c @@ -351,7 +348,7 @@ F: drivers/input/keyboard/adp5520-keys.c ADP5588 QWERTY KEYPAD AND IO EXPANDER DRIVER (ADP5588/ADP5587) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADP5588 S: Supported F: drivers/input/keyboard/adp5588-keys.c @@ -359,7 +356,7 @@ F: drivers/gpio/adp5588-gpio.c ADP8860 BACKLIGHT DRIVER (ADP8860/ADP8861/ADP8863) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADP8860 S: Supported F: drivers/video/backlight/adp8860_bl.c @@ -386,7 +383,7 @@ F: drivers/hwmon/adt7475.c ADXL34X THREE-AXIS DIGITAL ACCELEROMETER DRIVER (ADXL345/ADXL346) M: Michael Hennerich -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org W: http://wiki.analog.com/ADXL345 S: Supported F: drivers/input/misc/adxl34x.c @@ -404,8 +401,8 @@ S: Maintained F: sound/oss/aedsp16.c AFFS FILE SYSTEM -M: Roman Zippel -S: Maintained +L: linux-fsdevel@vger.kernel.org +S: Orphan F: Documentation/filesystems/affs.txt F: fs/affs/ @@ -482,6 +479,13 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMD FAM15H PROCESSOR POWER MONITORING DRIVER +M: Andreas Herrmann +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/fam15h_power +F: drivers/hwmon/fam15h_power.c + AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER M: Thomas Dahlmann L: linux-geode@lists.infradead.org (moderated for non-subscribers) @@ -525,7 +529,7 @@ S: Maintained F: drivers/infiniband/hw/amso1100/ ANALOG DEVICES INC ASOC CODEC DRIVERS -L: device-driver-devel@blackfin.uclinux.org +L: device-drivers-devel@blackfin.uclinux.org L: alsa-devel@alsa-project.org (moderated for non-subscribers) W: http://wiki.analog.com/ S: Supported @@ -547,10 +551,11 @@ S: Maintained F: sound/aoa/ APM DRIVER -L: linux-laptop@vger.kernel.org -S: Orphan +M: Jiri Kosina +S: Odd fixes F: arch/x86/kernel/apm_32.c F: include/linux/apm_bios.h +F: drivers/char/apm-emulation.c APPLE BCM5974 MULTITOUCH DRIVER M: Henrik Rydberg @@ -728,7 +733,7 @@ ARM/EZX SMARTPHONES (A780, A910, A1200, E680, ROKR E2 and ROKR E6) M: Daniel Ribeiro M: Stefan Schmidt M: Harald Welte -L: openezx-devel@lists.openezx.org (subscribers-only) +L: openezx-devel@lists.openezx.org (moderated for non-subscribers) W: http://www.openezx.org/ S: Maintained T: topgit git://git.openezx.org/openezx.git @@ -922,6 +927,8 @@ F: drivers/mmc/host/msm_sdcc.h F: drivers/tty/serial/msm_serial.h F: drivers/tty/serial/msm_serial.c F: drivers/platform/msm/ +F: drivers/*/pm8???-* +F: include/linux/mfd/pm8xxx/ T: git git://codeaurora.org/quic/kernel/davidb/linux-msm.git S: Maintained @@ -1031,12 +1038,13 @@ W: http://www.fluff.org/ben/linux/ S: Maintained F: arch/arm/mach-s3c64xx/ -ARM/S5P ARM ARCHITECTURES +ARM/S5P EXYNOS ARM ARCHITECTURES M: Kukjin Kim L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-s5p*/ +F: arch/arm/mach-exynos*/ ARM/SAMSUNG MOBILE MACHINE SUPPORT M: Kyungmin Park @@ -1230,13 +1238,6 @@ W: http://wireless.kernel.org/en/users/Drivers/ath9k S: Supported F: drivers/net/wireless/ath/ath9k/ -ATHEROS AR9170 WIRELESS DRIVER -M: Christian Lamparter -L: linux-wireless@vger.kernel.org -W: http://wireless.kernel.org/en/users/Drivers/ar9170 -S: Obsolete -F: drivers/net/wireless/ath/ar9170/ - CARL9170 LINUX COMMUNITY WIRELESS DRIVER M: Christian Lamparter L: linux-wireless@vger.kernel.org @@ -1738,7 +1739,7 @@ S: Supported F: drivers/net/enic/ CIRRUS LOGIC EP93XX ETHERNET DRIVER -M: Lennert Buytenhek +M: Hartley Sweeten L: netdev@vger.kernel.org S: Maintained F: drivers/net/arm/ep93xx_eth.c @@ -1888,7 +1889,6 @@ L: cpufreq@vger.kernel.org W: http://www.codemonkey.org.uk/projects/cpufreq/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git S: Maintained -F: arch/x86/kernel/cpu/cpufreq/ F: drivers/cpufreq/ F: include/linux/cpufreq.h @@ -2038,9 +2038,8 @@ F: net/ax25/ax25_timer.c F: net/ax25/sysctl_net_ax25.c DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER -M: Tobias Ringstrom L: netdev@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/networking/dmfe.txt F: drivers/net/tulip/dmfe.c @@ -2174,6 +2173,8 @@ M: Dan Williams S: Supported F: drivers/dma/ F: include/linux/dma* +T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx.git +T: git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma) DME1737 HARDWARE MONITOR DRIVER M: Juerg Haefliger @@ -2249,10 +2250,10 @@ F: drivers/gpu/drm/ F: include/drm/ INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) -M: Chris Wilson +M: Keith Packard L: intel-gfx@lists.freedesktop.org (subscribers-only) L: dri-devel@lists.freedesktop.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/ickle/drm-intel.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/keithp/linux-2.6.git S: Supported F: drivers/gpu/drm/i915 F: include/drm/i915* @@ -2290,8 +2291,7 @@ F: drivers/scsi/eata_pio.* EBTABLES M: Bart De Schuymer -L: ebtables-user@lists.sourceforge.net -L: ebtables-devel@lists.sourceforge.net +L: netfilter-devel@vger.kernel.org W: http://ebtables.sourceforge.net/ S: Maintained F: include/linux/netfilter_bridge/ebt_*.h @@ -2300,7 +2300,7 @@ F: net/bridge/netfilter/ebt*.c ECRYPT FILE SYSTEM M: Tyler Hicks M: Dustin Kirkland -L: ecryptfs-devel@lists.launchpad.net +L: ecryptfs@vger.kernel.org W: https://launchpad.net/ecryptfs S: Supported F: Documentation/filesystems/ecryptfs.txt @@ -2580,6 +2580,13 @@ S: Maintained F: drivers/hwmon/f75375s.c F: include/linux/f75375s.h +FIREWIRE AUDIO DRIVERS +M: Clemens Ladisch +L: alsa-devel@alsa-project.org (moderated for non-subscribers) +T: git git://git.alsa-project.org/alsa-kernel.git +S: Maintained +F: sound/firewire/ + FIREWIRE SUBSYSTEM M: Stefan Richter L: linux1394-devel@lists.sourceforge.net @@ -2807,42 +2814,23 @@ GPIO SUBSYSTEM M: Grant Likely S: Maintained T: git git://git.secretlab.ca/git/linux-2.6.git -F: Documentation/gpio/gpio.txt +F: Documentation/gpio.txt F: drivers/gpio/ F: include/linux/gpio* +GRE DEMULTIPLEXER DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: net/ipv4/gre.c +F: include/net/gre.h + GRETH 10/100/1G Ethernet MAC device driver M: Kristoffer Glembo L: netdev@vger.kernel.org S: Maintained F: drivers/net/greth* -HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER -M: Frank Seidel -L: platform-driver-x86@vger.kernel.org -W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/ -S: Maintained -F: drivers/platform/x86/hdaps.c - -HWPOISON MEMORY FAILURE HANDLING -M: Andi Kleen -L: linux-mm@kvack.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison -S: Maintained -F: mm/memory-failure.c -F: mm/hwpoison-inject.c - -HYPERVISOR VIRTUAL CONSOLE DRIVER -L: linuxppc-dev@lists.ozlabs.org -S: Odd Fixes -F: drivers/tty/hvc/ - -iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER -M: Peter Jones -M: Konrad Rzeszutek Wilk -S: Maintained -F: drivers/firmware/iscsi_ibft* - GSPCA FINEPIX SUBDRIVER M: Frank Zago L: linux-media@vger.kernel.org @@ -2893,6 +2881,26 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-2.6.git S: Maintained F: drivers/media/video/gspca/ +HARD DRIVE ACTIVE PROTECTION SYSTEM (HDAPS) DRIVER +M: Frank Seidel +L: platform-driver-x86@vger.kernel.org +W: http://www.kernel.org/pub/linux/kernel/people/fseidel/hdaps/ +S: Maintained +F: drivers/platform/x86/hdaps.c + +HWPOISON MEMORY FAILURE HANDLING +M: Andi Kleen +L: linux-mm@kvack.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6.git hwpoison +S: Maintained +F: mm/memory-failure.c +F: mm/hwpoison-inject.c + +HYPERVISOR VIRTUAL CONSOLE DRIVER +L: linuxppc-dev@lists.ozlabs.org +S: Odd Fixes +F: drivers/tty/hvc/ + HARDWARE MONITORING M: Jean Delvare M: Guenter Roeck @@ -2943,8 +2951,8 @@ F: drivers/block/cciss* F: include/linux/cciss_ioctl.h HFS FILESYSTEM -M: Roman Zippel -S: Maintained +L: linux-fsdevel@vger.kernel.org +S: Orphan F: Documentation/filesystems/hfs.txt F: fs/hfs/ @@ -3019,9 +3027,8 @@ S: Maintained F: drivers/net/wireless/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER -M: Carlos Corbacho L: platform-driver-x86@vger.kernel.org -S: Odd Fixes +S: Orphan F: drivers/platform/x86/tc1100-wmi.c HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series @@ -3360,6 +3367,12 @@ F: Documentation/wimax/README.i2400m F: drivers/net/wimax/i2400m/ F: include/linux/wimax/i2400m.h +INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy) +M: Stanislaw Gruszka +L: linux-wireless@vger.kernel.org +S: Supported +F: drivers/net/wireless/iwlegacy/ + INTEL WIRELESS WIFI LINK (iwlwifi) M: Wey-Yi Guy M: Intel Linux Wireless @@ -3476,6 +3489,12 @@ F: Documentation/isapnp.txt F: drivers/pnp/isapnp/ F: include/linux/isapnp.h +iSCSI BOOT FIRMWARE TABLE (iBFT) DRIVER +M: Peter Jones +M: Konrad Rzeszutek Wilk +S: Maintained +F: drivers/firmware/iscsi_ibft* + ISCSI M: Mike Christie L: open-iscsi@googlegroups.com @@ -3557,9 +3576,16 @@ M: Andrew Morton M: Jan Kara L: linux-ext4@vger.kernel.org S: Maintained -F: fs/jbd*/ -F: include/linux/ext*jbd*.h -F: include/linux/jbd*.h +F: fs/jbd/ +F: include/linux/ext3_jbd.h +F: include/linux/jbd.h + +JOURNALLING LAYER FOR BLOCK DEVICES (JBD2) +M: "Theodore Ts'o" +L: linux-ext4@vger.kernel.org +S: Maintained +F: fs/jbd2/ +F: include/linux/jbd2.h JSM Neo PCI based serial card M: Breno Leitao @@ -3582,10 +3608,9 @@ F: Documentation/hwmon/k8temp F: drivers/hwmon/k8temp.c KCONFIG -M: Roman Zippel +M: Michal Marek L: linux-kbuild@vger.kernel.org -Q: http://patchwork.kernel.org/project/linux-kbuild/list/ -S: Maintained +S: Odd Fixes F: Documentation/kbuild/kconfig-language.txt F: scripts/kconfig/ @@ -3696,7 +3721,7 @@ KEYS/KEYRINGS: M: David Howells L: keyrings@linux-nfs.org S: Maintained -F: Documentation/keys.txt +F: Documentation/security/keys.txt F: include/linux/key.h F: include/linux/key-type.h F: include/keys/ @@ -3708,7 +3733,7 @@ M: Mimi Zohar L: linux-security-module@vger.kernel.org L: keyrings@linux-nfs.org S: Supported -F: Documentation/keys-trusted-encrypted.txt +F: Documentation/security/keys-trusted-encrypted.txt F: include/keys/trusted-type.h F: security/keys/trusted.c F: security/keys/trusted.h @@ -3719,7 +3744,7 @@ M: David Safford L: linux-security-module@vger.kernel.org L: keyrings@linux-nfs.org S: Supported -F: Documentation/keys-trusted-encrypted.txt +F: Documentation/security/keys-trusted-encrypted.txt F: include/keys/encrypted-type.h F: security/keys/encrypted.c F: security/keys/encrypted.h @@ -3793,6 +3818,12 @@ S: Maintained F: drivers/leds/ F: include/linux/leds.h +LEGACY EEPROM DRIVER +M: Jean Delvare +S: Maintained +F: Documentation/misc-devices/eeprom +F: drivers/misc/eeprom/eeprom.c + LEGO USB Tower driver M: Juergen Stuber L: legousb-devel@lists.sourceforge.net @@ -3805,7 +3836,7 @@ M: Rusty Russell L: lguest@lists.ozlabs.org W: http://lguest.ozlabs.org/ S: Odd Fixes -F: Documentation/lguest/ +F: Documentation/virtual/lguest/ F: arch/x86/lguest/ F: drivers/lguest/ F: include/linux/lguest*.h @@ -3889,7 +3920,6 @@ F: drivers/*/*/*pasemi* LINUX SECURITY MODULE (LSM) FRAMEWORK M: Chris Wright L: linux-security-module@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/chrisw/lsm-2.6.git S: Supported LIS3LV02D ACCELEROMETER DRIVER @@ -3992,7 +4022,6 @@ F: arch/m32r/ M68K ARCHITECTURE M: Geert Uytterhoeven -M: Roman Zippel L: linux-m68k@lists.linux-m68k.org W: http://www.linux-m68k.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k.git @@ -4120,12 +4149,13 @@ F: include/linux/mm.h F: mm/ MEMORY RESOURCE CONTROLLER -M: Balbir Singh +M: Balbir Singh M: Daisuke Nishimura M: KAMEZAWA Hiroyuki L: linux-mm@kvack.org S: Maintained F: mm/memcontrol.c +F: mm/page_cgroup.c MEMORY TECHNOLOGY DEVICES (MTD) M: David Woodhouse @@ -4226,8 +4256,7 @@ F: drivers/mmc/ F: include/linux/mmc/ MULTIMEDIA CARD (MMC) ETC. OVER SPI -M: David Brownell -S: Odd Fixes +S: Orphan F: drivers/mmc/host/mmc_spi.c F: include/linux/spi/mmc_spi.h @@ -4245,7 +4274,7 @@ F: include/linux/isicom.h MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER M: Felipe Balbi L: linux-usb@vger.kernel.org -T: git git://gitorious.org/usb/usb.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git S: Maintained F: drivers/usb/musb/ @@ -4262,6 +4291,13 @@ M: Tim Hockin S: Maintained F: drivers/net/natsemi.c +NATIVE INSTRUMENTS USB SOUND INTERFACE DRIVER +M: Daniel Mack +S: Maintained +L: alsa-devel@alsa-project.org +W: http://www.native-instruments.com +F: sound/usb/caiaq/ + NCP FILESYSTEM M: Petr Vandrovec S: Odd Fixes @@ -4377,6 +4413,7 @@ S: Maintained F: net/ipv4/ F: net/ipv6/ F: include/net/ip* +F: arch/x86/net/* NETWORKING [LABELED] (NetLabel, CIPSO, Labeled IPsec, SECMARK) M: Paul Moore @@ -4569,9 +4606,9 @@ F: drivers/media/video/omap3isp/* OMAP USB SUPPORT M: Felipe Balbi -M: David Brownell L: linux-usb@vger.kernel.org L: linux-omap@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git S: Maintained F: drivers/usb/*/*omap* F: arch/arm/*omap*/usb* @@ -4857,7 +4894,7 @@ F: mm/percpu*.c F: arch/*/include/asm/percpu.h PER-TASK DELAY ACCOUNTING -M: Balbir Singh +M: Balbir Singh S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c @@ -4912,6 +4949,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/epip/linux-2.6-unicore32.gi F: drivers/input/serio/i8042-unicore32io.h F: drivers/i2c/busses/i2c-puv3.c F: drivers/video/fb-puv3.c +F: drivers/rtc/rtc-puv3.c PMC SIERRA MaxRAID DRIVER M: Anil Ravindranath @@ -4987,6 +5025,13 @@ F: Documentation/pps/ F: drivers/pps/ F: include/linux/pps*.h +PPTP DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/pptp.c +W: http://sourceforge.net/projects/accel-pptp + PREEMPTIBLE KERNEL M: Robert Love L: kpreempt-tech@lists.sourceforge.net @@ -5395,7 +5440,7 @@ F: drivers/media/video/*7146* F: include/media/*7146* SAMSUNG AUDIO (ASoC) DRIVERS -M: Jassi Brar +M: Jassi Brar L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/samsung @@ -5406,6 +5451,13 @@ L: linux-serial@vger.kernel.org S: Maintained F: drivers/tty/serial +SYNOPSYS DESIGNWARE DMAC DRIVER +M: Viresh Kumar +S: Maintained +F: include/linux/dw_dmac.h +F: drivers/dma/dw_dmac_regs.h +F: drivers/dma/dw_dmac.c + TIMEKEEPING, NTP M: John Stultz M: Thomas Gleixner @@ -5416,6 +5468,7 @@ F: include/linux/timex.h F: kernel/time/clocksource.c F: kernel/time/time*.c F: kernel/time/ntp.c +F: drivers/clocksource TLG2300 VIDEO4LINUX-2 DRIVER M: Huang Shijie @@ -5469,7 +5522,7 @@ F: drivers/scsi/sg.c F: include/scsi/sg.h SCSI SUBSYSTEM -M: "James E.J. Bottomley" +M: "James E.J. Bottomley" L: linux-scsi@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi-rc-fixes-2.6.git @@ -5567,10 +5620,11 @@ M: James Morris M: Eric Paris L: selinux@tycho.nsa.gov (subscribers-only, general discussion) W: http://selinuxproject.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git +T: git git://git.infradead.org/users/eparis/selinux.git S: Supported F: include/linux/selinux* F: security/selinux/ +F: scripts/selinux/ APPARMOR SECURITY MODULE M: John Johansen @@ -5596,9 +5650,9 @@ F: include/linux/ata.h F: include/linux/libata.h SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER -M: Jayamohan Kallickal +M: Jayamohan Kallickal L: linux-scsi@vger.kernel.org -W: http://www.serverengines.com +W: http://www.emulex.com S: Supported F: drivers/scsi/be2iscsi/ @@ -5816,6 +5870,13 @@ S: Maintained F: drivers/ssb/ F: include/linux/ssb/ +BROADCOM SPECIFIC AMBA DRIVER (BCMA) +M: Rafał Miłecki +L: linux-wireless@vger.kernel.org +S: Maintained +F: drivers/bcma/ +F: include/linux/bcma/ + SONY VAIO CONTROL DEVICE DRIVER M: Mattia Dongili L: platform-driver-x86@vger.kernel.org @@ -5845,7 +5906,7 @@ F: include/sound/ F: sound/ SOUND - SOC LAYER / DYNAMIC AUDIO POWER MANAGEMENT (ASoC) -M: Liam Girdwood +M: Liam Girdwood M: Mark Brown T: git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound-2.6.git L: alsa-devel@alsa-project.org (moderated for non-subscribers) @@ -5926,7 +5987,6 @@ F: Documentation/serial/specialix.txt F: drivers/staging/tty/specialix* SPI SUBSYSTEM -M: David Brownell M: Grant Likely L: spi-devel-general@lists.sourceforge.net Q: http://patchwork.kernel.org/project/spi-devel-general/list/ @@ -5954,7 +6014,7 @@ F: Documentation/filesystems/spufs.txt F: arch/powerpc/platforms/cell/spufs/ SQUASHFS FILE SYSTEM -M: Phillip Lougher +M: Phillip Lougher L: squashfs-devel@lists.sourceforge.net (subscribers-only) W: http://squashfs.org.uk S: Maintained @@ -6030,8 +6090,19 @@ F: Documentation/filesystems/sysv-fs.txt F: fs/sysv/ F: include/linux/sysv_fs.h +TARGET SUBSYSTEM +M: Nicholas A. Bellinger +L: linux-scsi@vger.kernel.org +L: http://groups.google.com/group/linux-iscsi-target-dev +W: http://www.linux-iscsi.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/lio-core-2.6.git master +S: Supported +F: drivers/target/ +F: include/target/ +F: Documentation/target/ + TASKSTATS STATISTICS INTERFACE -M: Balbir Singh +M: Balbir Singh S: Maintained F: Documentation/accounting/taskstats* F: include/linux/taskstats* @@ -6096,7 +6167,7 @@ F: drivers/mmc/host/tifm_sd.c F: include/linux/tifm.h TI TWL4030 SERIES SOC CODEC DRIVER -M: Peter Ujfalusi +M: Peter Ujfalusi L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: sound/soc/codecs/twl4030* @@ -6199,6 +6270,7 @@ TRIVIAL PATCHES M: Jiri Kosina T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial.git S: Maintained +K: ^Subject:.*(?i)trivial TTY LAYER M: Greg Kroah-Hartman @@ -6362,9 +6434,8 @@ S: Maintained F: drivers/usb/misc/rio500* USB EHCI DRIVER -M: David Brownell L: linux-usb@vger.kernel.org -S: Odd Fixes +S: Orphan F: Documentation/usb/ehci.txt F: drivers/usb/host/ehci* @@ -6378,9 +6449,10 @@ S: Maintained F: drivers/media/video/et61x251/ USB GADGET/PERIPHERAL SUBSYSTEM -M: David Brownell +M: Felipe Balbi L: linux-usb@vger.kernel.org W: http://www.linux-usb.org/gadget +T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git S: Maintained F: drivers/usb/gadget/ F: include/linux/usb/gadget* @@ -6390,7 +6462,7 @@ M: Jiri Kosina L: linux-usb@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid.git S: Maintained -F: Documentation/usb/hiddev.txt +F: Documentation/hid/hiddev.txt F: drivers/hid/usbhid/ USB ISP116X DRIVER @@ -6422,9 +6494,8 @@ S: Maintained F: sound/usb/midi.* USB OHCI DRIVER -M: David Brownell L: linux-usb@vger.kernel.org -S: Odd Fixes +S: Orphan F: Documentation/usb/ohci.txt F: drivers/usb/host/ohci* @@ -6554,7 +6625,7 @@ S: Maintained F: drivers/usb/host/uhci* USB "USBNET" DRIVER FRAMEWORK -M: David Brownell +M: Oliver Neukum L: netdev@vger.kernel.org W: http://www.linux-usb.org/usbnet S: Maintained @@ -6616,7 +6687,7 @@ L: user-mode-linux-devel@lists.sourceforge.net L: user-mode-linux-user@lists.sourceforge.net W: http://user-mode-linux.sourceforge.net S: Maintained -F: Documentation/uml/ +F: Documentation/virtual/uml/ F: arch/um/ F: fs/hostfs/ F: fs/hppfs/ @@ -6650,6 +6721,14 @@ S: Maintained F: Documentation/filesystems/vfat.txt F: fs/fat/ +VIDEOBUF2 FRAMEWORK +M: Pawel Osciak +M: Marek Szyprowski +L: linux-media@vger.kernel.org +S: Maintained +F: drivers/media/video/videobuf2-* +F: include/media/videobuf2-* + VIRTIO CONSOLE DRIVER M: Amit Shah L: virtualization@lists.linux-foundation.org @@ -6740,7 +6819,7 @@ F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h VOLTAGE AND CURRENT REGULATOR FRAMEWORK -M: Liam Girdwood +M: Liam Girdwood M: Mark Brown W: http://opensource.wolfsonmicro.com/node/15 W: http://www.slimlogic.co.uk/?p=48 @@ -6762,6 +6841,13 @@ L: lm-sensors@lm-sensors.org S: Maintained F: drivers/hwmon/vt8231.c +VUB300 USB to SDIO/SD/MMC bridge chip +M: Tony Olech +L: linux-mmc@vger.kernel.org +L: linux-usb@vger.kernel.org +S: Supported +F: drivers/mmc/host/vub300.c + W1 DALLAS'S 1-WIRE BUS M: Evgeniy Polyakov S: Maintained @@ -6920,6 +7006,25 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86. S: Maintained F: drivers/platform/x86 +X86 MCE INFRASTRUCTURE +M: Tony Luck +M: Borislav Petkov +L: linux-edac@vger.kernel.org +S: Maintained +F: arch/x86/kernel/cpu/mcheck/* + +XEN HYPERVISOR INTERFACE +M: Jeremy Fitzhardinge +M: Konrad Rzeszutek Wilk +L: xen-devel@lists.xensource.com (moderated for non-subscribers) +L: virtualization@lists.linux-foundation.org +S: Supported +F: arch/x86/xen/ +F: drivers/*/xen-*front.c +F: drivers/xen/ +F: arch/x86/include/asm/xen/ +F: include/xen/ + XEN NETWORK BACKEND DRIVER M: Ian Campbell L: xen-devel@lists.xensource.com (moderated for non-subscribers) @@ -6941,18 +7046,6 @@ S: Supported F: arch/x86/xen/*swiotlb* F: drivers/xen/*swiotlb* -XEN HYPERVISOR INTERFACE -M: Jeremy Fitzhardinge -M: Konrad Rzeszutek Wilk -L: xen-devel@lists.xensource.com (moderated for non-subscribers) -L: virtualization@lists.linux-foundation.org -S: Supported -F: arch/x86/xen/ -F: drivers/*/xen-*front.c -F: drivers/xen/ -F: arch/x86/include/asm/xen/ -F: include/xen/ - XFS FILESYSTEM P: Silicon Graphics Inc M: Alex Elder @@ -7022,20 +7115,6 @@ M: "Maciej W. Rozycki" S: Maintained F: drivers/tty/serial/zs.* -GRE DEMULTIPLEXER DRIVER -M: Dmitry Kozlov -L: netdev@vger.kernel.org -S: Maintained -F: net/ipv4/gre.c -F: include/net/gre.h - -PPTP DRIVER -M: Dmitry Kozlov -L: netdev@vger.kernel.org -S: Maintained -F: drivers/net/pptp.c -W: http://sourceforge.net/projects/accel-pptp - THE REST M: Linus Torvalds L: linux-kernel@vger.kernel.org diff --git a/Makefile b/Makefile index b967b967572b..41330a06e4ec 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ -VERSION = 2 -PATCHLEVEL = 6 -SUBLEVEL = 39 +VERSION = 3 +PATCHLEVEL = 0 +SUBLEVEL = 0 EXTRAVERSION = -rc4 -NAME = Flesh-Eating Bats with Fangs +NAME = Sneaky Weasel # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -103,7 +103,7 @@ ifeq ("$(origin O)", "command line") endif ifeq ("$(origin W)", "command line") - export KBUILD_ENABLE_EXTRA_GCC_CHECKS := 1 + export KBUILD_ENABLE_EXTRA_GCC_CHECKS := $(W) endif # That's our default target when none is given on the command line @@ -220,6 +220,14 @@ ifeq ($(ARCH),sh64) SRCARCH := sh endif +# Additional ARCH settings for tile +ifeq ($(ARCH),tilepro) + SRCARCH := tile +endif +ifeq ($(ARCH),tilegx) + SRCARCH := tile +endif + # Where to locate arch specific headers hdr-arch := $(SRCARCH) @@ -349,7 +357,8 @@ CFLAGS_GCOV = -fprofile-arcs -ftest-coverage # Use LINUXINCLUDE when you must reference the include/ directory. # Needed to be compatible with the O= option -LINUXINCLUDE := -I$(srctree)/arch/$(hdr-arch)/include -Iinclude \ +LINUXINCLUDE := -I$(srctree)/arch/$(hdr-arch)/include \ + -Iarch/$(hdr-arch)/include/generated -Iinclude \ $(if $(KBUILD_SRC), -I$(srctree)/include) \ -include include/generated/autoconf.h @@ -369,7 +378,7 @@ KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds # Read KERNELRELEASE from include/config/kernel.release (if it exists) KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) -KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) +KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC @@ -382,6 +391,7 @@ export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL +export KBUILD_ARFLAGS # When compiling out-of-tree modules, put MODVERDIR in the module # tree rather than in the kernel tree. The kernel tree might @@ -416,6 +426,12 @@ ifneq ($(KBUILD_SRC),) $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL) endif +# Support for using generic headers in asm-generic +PHONY += asm-generic +asm-generic: + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-generic \ + obj=arch/$(SRCARCH)/include/generated/asm + # To make sure we do not include .config for any of the *config targets # catch them early, and hand them over to scripts/kconfig/Makefile # It is allowed to specify more targets when calling make, including @@ -559,6 +575,10 @@ ifndef CONFIG_CC_STACKPROTECTOR KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) endif +# This warning generated too much noise in a regular build. +# Use make W=1 to enable this warning (see scripts/Makefile.build) +KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls else @@ -604,7 +624,7 @@ CHECKFLAGS += $(NOSTDINC_FLAGS) KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) # disable pointer signed / unsigned warnings in gcc 4.0 -KBUILD_CFLAGS += $(call cc-option,-Wno-pointer-sign,) +KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) @@ -612,6 +632,9 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) +# use the deterministic mode of AR if available +KBUILD_ARFLAGS := $(call ar-option,D) + # check for 'asm goto' ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO @@ -797,15 +820,17 @@ ifdef CONFIG_KALLSYMS # o The correct .tmp_kallsyms2.o is linked into the final vmlinux. # o Verify that the System.map from vmlinux matches the map from # .tmp_vmlinux2, just in case we did not generate kallsyms correctly. -# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using +# o If 'make KALLSYMS_EXTRA_PASS=1" was used, do an extra pass using # .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a # temporary bypass to allow the kernel to be built while the # maintainers work out what went wrong with kallsyms. -ifdef CONFIG_KALLSYMS_EXTRA_PASS -last_kallsyms := 3 -else last_kallsyms := 2 + +ifdef KALLSYMS_EXTRA_PASS +ifneq ($(KALLSYMS_EXTRA_PASS),0) +last_kallsyms := 3 +endif endif kallsyms.o := .tmp_kallsyms$(last_kallsyms).o @@ -816,7 +841,8 @@ define verify_kallsyms $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map $(Q)cmp -s System.map .tmp_System.map || \ (echo Inconsistent kallsyms data; \ - echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \ + echo This is a bug - please report about it; \ + echo Try "make KALLSYMS_EXTRA_PASS=1" as a workaround; \ rm .tmp_kallsyms* ; /bin/false ) endef @@ -947,7 +973,7 @@ ifneq ($(KBUILD_SRC),) endif # prepare2 creates a makefile if using a separate output directory -prepare2: prepare3 outputmakefile +prepare2: prepare3 outputmakefile asm-generic prepare1: prepare2 include/linux/version.h include/generated/utsrelease.h \ include/config/auto.conf @@ -979,7 +1005,7 @@ endef define filechk_version.h (echo \#define LINUX_VERSION_CODE $(shell \ - expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \ + expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 0$(SUBLEVEL)); \ echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';) endef @@ -991,7 +1017,8 @@ include/generated/utsrelease.h: include/config/kernel.release FORCE PHONY += headerdep headerdep: - $(Q)find include/ -name '*.h' | xargs --max-args 1 scripts/headerdep.pl + $(Q)find $(srctree)/include/ -name '*.h' | xargs --max-args 1 \ + $(srctree)/scripts/headerdep.pl -I$(srctree)/include # --------------------------------------------------------------------------- @@ -1021,7 +1048,7 @@ hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm) PHONY += __headers -__headers: include/linux/version.h scripts_basic FORCE +__headers: include/linux/version.h scripts_basic asm-generic FORCE $(Q)$(MAKE) $(build)=scripts build_unifdef PHONY += headers_install_all @@ -1083,11 +1110,6 @@ modules_install: _modinst_ _modinst_post PHONY += _modinst_ _modinst_: - @if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \ - echo "Warning: you may need to install module-init-tools"; \ - echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\ - sleep 1; \ - fi @rm -rf $(MODLIB)/kernel @rm -f $(MODLIB)/source @mkdir -p $(MODLIB)/kernel @@ -1136,7 +1158,8 @@ CLEAN_FILES += vmlinux System.map \ .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map # Directories & files removed with 'make mrproper' -MRPROPER_DIRS += include/config usr/include include/generated +MRPROPER_DIRS += include/config usr/include include/generated \ + arch/*/include/generated MRPROPER_FILES += .config .config.old .version .old_version \ include/linux/version.h \ Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS @@ -1267,7 +1290,12 @@ help: @echo ' make O=dir [targets] Locate all output files in "dir", including .config' @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' - @echo ' make W=1 [targets] Enable extra gcc checks' + @echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where' + @echo ' 1: warnings which may be relevant and do not occur too often' + @echo ' 2: warnings which occur quite often but may still be relevant' + @echo ' 3: more obscure warnings, can most likely be ignored' + @echo ' Multiple levels can be combined with W=12 or W=123' + @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' @echo '' @echo 'Execute "make" or "make all" to build all targets marked with [*] ' @echo 'For further info see the ./README file' @@ -1290,6 +1318,7 @@ $(help-board-dirs): help-%: # Documentation targets # --------------------------------------------------------------------------- %docs: scripts_basic FORCE + $(Q)$(MAKE) $(build)=scripts build_docproc $(Q)$(MAKE) $(build)=Documentation/DocBook $@ else # KBUILD_EXTMOD @@ -1374,7 +1403,7 @@ endif # KBUILD_EXTMOD clean: $(clean-dirs) $(call cmd,rmdirs) $(call cmd,rmfiles) - @find $(or $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ + @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ -o -name '*.symtypes' -o -name 'modules.order' \ @@ -1392,13 +1421,15 @@ tags TAGS cscope gtags: FORCE # Scripts to check various things for consistency # --------------------------------------------------------------------------- +PHONY += includecheck versioncheck coccicheck namespacecheck export_report + includecheck: - find * $(RCS_FIND_IGNORE) \ + find $(srctree)/* $(RCS_FIND_IGNORE) \ -name '*.[hcS]' -type f -print | sort \ | xargs $(PERL) -w $(srctree)/scripts/checkincludes.pl versioncheck: - find * $(RCS_FIND_IGNORE) \ + find $(srctree)/* $(RCS_FIND_IGNORE) \ -name '*.[hcS]' -type f -print | sort \ | xargs $(PERL) -w $(srctree)/scripts/checkversion.pl @@ -1495,12 +1526,8 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) # Run depmod only if we have System.map and depmod is executable quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) - cmd_depmod = \ - if [ -r System.map -a -x $(DEPMOD) ]; then \ - $(DEPMOD) -ae -F System.map \ - $(if $(strip $(INSTALL_MOD_PATH)), -b $(INSTALL_MOD_PATH) ) \ - $(KERNELRELEASE); \ - fi + cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \ + $(KERNELRELEASE) # Create temporary dir for module support files # clean it up only when building all modules diff --git a/arch/Kconfig b/arch/Kconfig index f78c2be4242b..26b0e2397a57 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -144,9 +144,6 @@ config HAVE_CLK config HAVE_DMA_API_DEBUG bool -config HAVE_DEFAULT_NO_SPIN_MUTEXES - bool - config HAVE_HW_BREAKPOINT bool depends on PERF_EVENTS @@ -178,4 +175,7 @@ config HAVE_ARCH_JUMP_LABEL config HAVE_ARCH_MUTEX_CPU_RELAX bool +config HAVE_RCU_TABLE_FREE + bool + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 9808998cc073..60219bf94198 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -12,6 +12,7 @@ config ALPHA select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP select GENERIC_IRQ_SHOW + select ARCH_WANT_OPTIONAL_GPIOLIB help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, @@ -40,10 +41,6 @@ config ARCH_HAS_ILOG2_U64 bool default n -config GENERIC_FIND_NEXT_BIT - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y @@ -51,6 +48,9 @@ config GENERIC_CALIBRATE_DELAY config GENERIC_CMOS_UPDATE def_bool y +config GENERIC_GPIO + def_bool y + config ZONE_DMA bool default y diff --git a/arch/alpha/include/asm/gpio.h b/arch/alpha/include/asm/gpio.h new file mode 100644 index 000000000000..7dc6a6343c06 --- /dev/null +++ b/arch/alpha/include/asm/gpio.h @@ -0,0 +1,55 @@ +/* + * Generic GPIO API implementation for Alpha. + * + * A stright copy of that for PowerPC which was: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _ASM_ALPHA_GPIO_H +#define _ASM_ALPHA_GPIO_H + +#include +#include + +#ifdef CONFIG_GPIOLIB + +/* + * We don't (yet) implement inlined/rapid versions for on-chip gpios. + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +static inline int gpio_to_irq(unsigned int gpio) +{ + return __gpio_to_irq(gpio); +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ + +#endif /* _ASM_ALPHA_GPIO_H */ diff --git a/arch/alpha/include/asm/smp.h b/arch/alpha/include/asm/smp.h index 3f390e8cc0b3..c46e714aa3e0 100644 --- a/arch/alpha/include/asm/smp.h +++ b/arch/alpha/include/asm/smp.h @@ -39,8 +39,6 @@ struct cpuinfo_alpha { extern struct cpuinfo_alpha cpu_data[NR_CPUS]; -#define PROC_CHANGE_PENALTY 20 - #define hard_smp_processor_id() __hard_smp_processor_id() #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index 058937bf5a77..4ac48a095f3a 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -452,10 +452,15 @@ #define __NR_fanotify_init 494 #define __NR_fanotify_mark 495 #define __NR_prlimit64 496 +#define __NR_name_to_handle_at 497 +#define __NR_open_by_handle_at 498 +#define __NR_clock_adjtime 499 +#define __NR_syncfs 500 +#define __NR_setns 501 #ifdef __KERNEL__ -#define NR_SYSCALLS 497 +#define NR_SYSCALLS 502 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 376f22130791..326f0a2d56e5 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -409,7 +409,7 @@ SYSCALL_DEFINE2(osf_getdomainname, char __user *, name, int, namelen) return -EFAULT; len = namelen; - if (namelen > 32) + if (len > 32) len = 32; down_read(&uts_sem); @@ -594,7 +594,7 @@ SYSCALL_DEFINE3(osf_sysinfo, int, command, char __user *, buf, long, count) down_read(&uts_sem); res = sysinfo_table[offset]; len = strlen(res)+1; - if (len > count) + if ((unsigned long)len > (unsigned long)count) len = count; if (copy_to_user(buf, res, len)) err = -EFAULT; @@ -649,7 +649,7 @@ SYSCALL_DEFINE5(osf_getsysinfo, unsigned long, op, void __user *, buffer, return 1; case GSI_GET_HWRPB: - if (nbytes < sizeof(*hwrpb)) + if (nbytes > sizeof(*hwrpb)) return -EINVAL; if (copy_to_user(buffer, hwrpb, nbytes) != 0) return -EFAULT; @@ -1008,6 +1008,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, { struct rusage r; long ret, err; + unsigned int status = 0; mm_segment_t old_fs; if (!ur) @@ -1016,13 +1017,15 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options, old_fs = get_fs(); set_fs (KERNEL_DS); - ret = sys_wait4(pid, ustatus, options, (struct rusage __user *) &r); + ret = sys_wait4(pid, (unsigned int __user *) &status, options, + (struct rusage __user *) &r); set_fs (old_fs); if (!access_ok(VERIFY_WRITE, ur, sizeof(*ur))) return -EFAULT; err = 0; + err |= put_user(status, ustatus); err |= __put_user(r.ru_utime.tv_sec, &ur->ru_utime.tv_sec); err |= __put_user(r.ru_utime.tv_usec, &ur->ru_utime.tv_usec); err |= __put_user(r.ru_stime.tv_sec, &ur->ru_stime.tv_sec); diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 3ec35066f1dc..838eac128409 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -121,7 +121,7 @@ common_shutdown_1(void *generic_ptr) /* Wait for the secondaries to halt. */ set_cpu_present(boot_cpuid, false); set_cpu_possible(boot_cpuid, false); - while (cpus_weight(cpu_present_map)) + while (cpumask_weight(cpu_present_mask)) barrier(); #endif diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index edbddcbd5bc6..cc0fd862cf26 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -1257,7 +1257,7 @@ show_cpuinfo(struct seq_file *f, void *slot) #ifdef CONFIG_SMP seq_printf(f, "cpus active\t\t: %u\n" "cpu active mask\t\t: %016lx\n", - num_online_cpus(), cpus_addr(cpu_possible_map)[0]); + num_online_cpus(), cpumask_bits(cpu_possible_mask)[0]); #endif show_cache_size (f, "L1 Icache", alpha_l1i_cacheshape); diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 42aa078a5e4d..d739703608fc 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -451,7 +451,7 @@ setup_smp(void) } printk(KERN_INFO "SMP: %d CPUs probed -- cpu_present_map = %lx\n", - smp_num_probed, cpu_present_map.bits[0]); + smp_num_probed, cpumask_bits(cpu_present_mask)[0]); } /* @@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs) switch (which) { case IPI_RESCHEDULE: - /* Reschedule callback. Everything to be done - is done by the interrupt return path. */ + scheduler_ipi(); break; case IPI_CALL_FUNC: @@ -630,8 +629,9 @@ smp_send_reschedule(int cpu) void smp_send_stop(void) { - cpumask_t to_whom = cpu_possible_map; - cpu_clear(smp_processor_id(), to_whom); + cpumask_t to_whom; + cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) printk(KERN_WARNING "smp_send_stop: Not on boot cpu.\n"); diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 5ac00fd4cd0c..f8856829c22a 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -140,7 +140,7 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) for (cpu = 0; cpu < 4; cpu++) { unsigned long aff = cpu_irq_affinity[cpu]; - if (cpu_isset(cpu, affinity)) + if (cpumask_test_cpu(cpu, &affinity)) aff |= 1UL << irq; else aff &= ~(1UL << irq); diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index fea0e4620994..6994407e242a 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -65,10 +65,11 @@ titan_update_irq_hw(unsigned long mask) register int bcpu = boot_cpuid; #ifdef CONFIG_SMP - cpumask_t cpm = cpu_present_map; + cpumask_t cpm; volatile unsigned long *dim0, *dim1, *dim2, *dim3; unsigned long mask0, mask1, mask2, mask3, dummy; + cpumask_copy(&cpm, cpu_present_mask); mask &= ~isa_enable; mask0 = mask & titan_cpu_irq_affinity[0]; mask1 = mask & titan_cpu_irq_affinity[1]; @@ -84,10 +85,10 @@ titan_update_irq_hw(unsigned long mask) dim1 = &cchip->dim1.csr; dim2 = &cchip->dim2.csr; dim3 = &cchip->dim3.csr; - if (!cpu_isset(0, cpm)) dim0 = &dummy; - if (!cpu_isset(1, cpm)) dim1 = &dummy; - if (!cpu_isset(2, cpm)) dim2 = &dummy; - if (!cpu_isset(3, cpm)) dim3 = &dummy; + if (!cpumask_test_cpu(0, &cpm)) dim0 = &dummy; + if (!cpumask_test_cpu(1, &cpm)) dim1 = &dummy; + if (!cpumask_test_cpu(2, &cpm)) dim2 = &dummy; + if (!cpumask_test_cpu(3, &cpm)) dim3 = &dummy; *dim0 = mask0; *dim1 = mask1; @@ -137,7 +138,7 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) int cpu; for (cpu = 0; cpu < 4; cpu++) { - if (cpu_isset(cpu, affinity)) + if (cpumask_test_cpu(cpu, &affinity)) titan_cpu_irq_affinity[cpu] |= 1UL << irq; else titan_cpu_irq_affinity[cpu] &= ~(1UL << irq); diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index a6a1de9db16f..b9c28f3f1956 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -498,23 +498,28 @@ sys_call_table: .quad sys_ni_syscall /* sys_timerfd */ .quad sys_eventfd .quad sys_recvmmsg - .quad sys_fallocate /* 480 */ + .quad sys_fallocate /* 480 */ .quad sys_timerfd_create .quad sys_timerfd_settime .quad sys_timerfd_gettime .quad sys_signalfd4 - .quad sys_eventfd2 /* 485 */ + .quad sys_eventfd2 /* 485 */ .quad sys_epoll_create1 .quad sys_dup3 .quad sys_pipe2 .quad sys_inotify_init1 - .quad sys_preadv /* 490 */ + .quad sys_preadv /* 490 */ .quad sys_pwritev .quad sys_rt_tgsigqueueinfo .quad sys_perf_event_open .quad sys_fanotify_init - .quad sys_fanotify_mark /* 495 */ + .quad sys_fanotify_mark /* 495 */ .quad sys_prlimit64 + .quad sys_name_to_handle_at + .quad sys_open_by_handle_at + .quad sys_clock_adjtime + .quad sys_syncfs /* 500 */ + .quad sys_setns .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 918e8e0b72ff..818e74ed45dc 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -375,8 +375,7 @@ static struct clocksource clocksource_rpcc = { static inline void register_rpcc_clocksource(long cycle_freq) { - clocksource_calc_mult_shift(&clocksource_rpcc, cycle_freq, 4); - clocksource_register(&clocksource_rpcc); + clocksource_register_hz(&clocksource_rpcc, cycle_freq); } #else /* !CONFIG_SMP */ static inline void register_rpcc_clocksource(long cycle_freq) diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 433be2a24f31..f937ad123852 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -39,13 +39,14 @@ SECTIONS __init_begin = ALIGN(PAGE_SIZE); INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page needed for the THREAD_SIZE aligned init_task gets freed after init */ . = ALIGN(THREAD_SIZE); __init_end = .; /* Freed after init ends here */ + _sdata = .; /* Start of rw data section */ _data = .; RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 86425ab53bf5..69d0c5761e2f 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -32,8 +32,6 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - extern void die_if_kernel(char *,struct pt_regs *,long); static struct pcb_struct original_pcb; diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index 7b2c56d8f930..3973ae395772 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -313,6 +313,7 @@ void __init paging_init(void) zones_size[ZONE_DMA] = dma_local_pfn; zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn; } + node_set_state(nid, N_NORMAL_MEMORY); free_area_init_node(nid, zones_size, start_pfn, NULL); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 377a7a595b08..9adc278a22ab 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -197,15 +197,21 @@ config ARM_PATCH_PHYS_VIRT depends on !XIP_KERNEL && MMU depends on !ARCH_REALVIEW || !SPARSEMEM help - Patch phys-to-virt translation functions at runtime according to - the position of the kernel in system memory. + Patch phys-to-virt and virt-to-phys translation functions at + boot and module load time according to the position of the + kernel in system memory. - This can only be used with non-XIP with MMU kernels where - the base of physical memory is at a 16MB boundary. + This can only be used with non-XIP MMU kernels where the base + of physical memory is at a 16MB boundary, or theoretically 64K + for the MSM machine class. config ARM_PATCH_PHYS_VIRT_16BIT def_bool y depends on ARM_PATCH_PHYS_VIRT && ARCH_MSM + help + This option extends the physical to virtual translation patching + to allow physical memory down to a theoretical minimum of 64K + boundaries. source "init/Kconfig" @@ -288,6 +294,8 @@ config ARCH_AT91 bool "Atmel AT91" select ARCH_REQUIRE_GPIOLIB select HAVE_CLK + select CLKDEV_LOOKUP + select ARM_PATCH_PHYS_VIRT if MMU help This enables support for systems based on the Atmel AT91RM9200, AT91SAM9 and AT91CAP9 processors. @@ -297,6 +305,7 @@ config ARCH_BCMRING depends on MMU select CPU_V6 select ARM_AMBA + select ARM_TIMER_SP804 select CLKDEV_LOOKUP select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -366,6 +375,7 @@ config ARCH_MXC select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB select CLKDEV_LOOKUP + select CLKSRC_MMIO select HAVE_SCHED_CLOCK help Support for Freescale MXC/iMX-based family of processors @@ -375,21 +385,13 @@ config ARCH_MXS select GENERIC_CLOCKEVENTS select ARCH_REQUIRE_GPIOLIB select CLKDEV_LOOKUP + select CLKSRC_MMIO help Support for Freescale MXS-based family of processors -config ARCH_STMP3XXX - bool "Freescale STMP3xxx" - select CPU_ARM926T - select CLKDEV_LOOKUP - select ARCH_REQUIRE_GPIOLIB - select GENERIC_CLOCKEVENTS - select USB_ARCH_HAS_EHCI - help - Support for systems based on the Freescale 3xxx CPUs. - config ARCH_NETX bool "Hilscher NetX based" + select CLKSRC_MMIO select CPU_ARM926T select ARM_VIC select GENERIC_CLOCKEVENTS @@ -457,6 +459,7 @@ config ARCH_IXP2000 config ARCH_IXP4XX bool "IXP4xx-based" depends on MMU + select CLKSRC_MMIO select CPU_XSCALE select GENERIC_GPIO select GENERIC_CLOCKEVENTS @@ -468,7 +471,7 @@ config ARCH_IXP4XX config ARCH_DOVE bool "Marvell Dove" - select CPU_V6K + select CPU_V7 select PCI select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS @@ -497,6 +500,7 @@ config ARCH_LOKI config ARCH_LPC32XX bool "NXP LPC32XX" + select CLKSRC_MMIO select CPU_ARM926T select ARCH_REQUIRE_GPIOLIB select HAVE_IDE @@ -554,23 +558,12 @@ config ARCH_KS8695 Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based System-on-Chip devices. -config ARCH_NS9XXX - bool "NetSilicon NS9xxx" - select CPU_ARM926T - select GENERIC_GPIO - select GENERIC_CLOCKEVENTS - select HAVE_CLK - help - Say Y here if you intend to run this kernel on a NetSilicon NS9xxx - System. - - - config ARCH_W90X900 bool "Nuvoton W90X900 CPU" select CPU_ARM926T select ARCH_REQUIRE_GPIOLIB select CLKDEV_LOOKUP + select CLKSRC_MMIO select GENERIC_CLOCKEVENTS help Support for Nuvoton (Winbond logic dept.) ARM9 processor, @@ -592,6 +585,7 @@ config ARCH_NUC93X config ARCH_TEGRA bool "NVIDIA Tegra" select CLKDEV_LOOKUP + select CLKSRC_MMIO select GENERIC_TIME select GENERIC_CLOCKEVENTS select GENERIC_GPIO @@ -617,6 +611,7 @@ config ARCH_PXA select ARCH_MTD_XIP select ARCH_HAS_CPUFREQ select CLKDEV_LOOKUP + select CLKSRC_MMIO select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS select HAVE_SCHED_CLOCK @@ -667,6 +662,7 @@ config ARCH_RPC config ARCH_SA1100 bool "SA1100-based" + select CLKSRC_MMIO select CPU_SA1100 select ISA select ARCH_SPARSEMEM_ENABLE @@ -736,16 +732,6 @@ config ARCH_S5P64X0 Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440, SMDK6450. -config ARCH_S5P6442 - bool "Samsung S5P6442" - select CPU_V6 - select GENERIC_GPIO - select HAVE_CLK - select ARCH_USES_GETTIMEOFFSET - select HAVE_S3C2410_WATCHDOG if WATCHDOG - help - Samsung S5P6442 CPU based systems - config ARCH_S5PC100 bool "Samsung S5PC100" select GENERIC_GPIO @@ -803,6 +789,7 @@ config ARCH_SHARK config ARCH_TCC_926 bool "Telechips TCC ARM926-based systems" + select CLKSRC_MMIO select CPU_ARM926T select HAVE_CLK select CLKDEV_LOOKUP @@ -813,6 +800,7 @@ config ARCH_TCC_926 config ARCH_U300 bool "ST-Ericsson U300 Series" depends on MMU + select CLKSRC_MMIO select CPU_ARM926T select HAVE_SCHED_CLOCK select HAVE_TCM @@ -854,6 +842,7 @@ config ARCH_DAVINCI select HAVE_IDE select CLKDEV_LOOKUP select GENERIC_ALLOCATOR + select GENERIC_IRQ_CHIP select ARCH_HAS_HOLES_MEMORYMODEL help Support for TI's DaVinci platform. @@ -874,6 +863,7 @@ config PLAT_SPEAR select ARM_AMBA select ARCH_REQUIRE_GPIOLIB select CLKDEV_LOOKUP + select CLKSRC_MMIO select GENERIC_CLOCKEVENTS select HAVE_CLK help @@ -951,8 +941,6 @@ source "arch/arm/mach-netx/Kconfig" source "arch/arm/mach-nomadik/Kconfig" source "arch/arm/plat-nomadik/Kconfig" -source "arch/arm/mach-ns9xxx/Kconfig" - source "arch/arm/mach-nuc93x/Kconfig" source "arch/arm/plat-omap/Kconfig" @@ -995,8 +983,6 @@ endif source "arch/arm/mach-s5p64x0/Kconfig" -source "arch/arm/mach-s5p6442/Kconfig" - source "arch/arm/mach-s5pc100/Kconfig" source "arch/arm/mach-s5pv210/Kconfig" @@ -1005,8 +991,6 @@ source "arch/arm/mach-exynos4/Kconfig" source "arch/arm/mach-shmobile/Kconfig" -source "arch/arm/plat-stmp3xxx/Kconfig" - source "arch/arm/mach-tegra/Kconfig" source "arch/arm/mach-u300/Kconfig" @@ -1033,6 +1017,8 @@ config PLAT_IOP config PLAT_ORION bool + select CLKSRC_MMIO + select GENERIC_IRQ_CHIP select HAVE_SCHED_CLOCK config PLAT_PXA @@ -1043,6 +1029,7 @@ config PLAT_VERSATILE config ARM_TIMER_SP804 bool + select CLKSRC_MMIO source arch/arm/mm/Kconfig @@ -1318,8 +1305,7 @@ menu "Kernel Features" source "kernel/time/Kconfig" config SMP - bool "Symmetric Multi-Processing (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Symmetric Multi-Processing" depends on CPU_V6K || CPU_V7 depends on GENERIC_CLOCKEVENTS depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ @@ -1403,7 +1389,6 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && HOTPLUG && EXPERIMENTAL - depends on !ARCH_MSM help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -1424,7 +1409,7 @@ source kernel/Kconfig.preempt config HZ int default 200 if ARCH_EBSA110 || ARCH_S3C2410 || ARCH_S5P64X0 || \ - ARCH_S5P6442 || ARCH_S5PV210 || ARCH_EXYNOS4 + ARCH_S5PV210 || ARCH_EXYNOS4 default OMAP_32K_TIMER_HZ if ARCH_OMAP && OMAP_32K_TIMER default AT91_TIMER_HZ if ARCH_AT91 default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE @@ -1520,9 +1505,12 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE +config HAVE_ARCH_PFN_VALID + def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM + config HIGHMEM - bool "High Memory Support (EXPERIMENTAL)" - depends on MMU && EXPERIMENTAL + bool "High Memory Support" + depends on MMU help The address space of ARM processors is only 4 Gigabytes large and it has to accommodate user address space, kernel address @@ -1687,6 +1675,13 @@ endmenu menu "Boot options" +config USE_OF + bool "Flattened Device Tree support" + select OF + select OF_EARLY_FLATTREE + help + Include support for flattened device tree machine descriptions. + # Compressed boot loader in ROM. Yes, we really want to ask about # TEXT and BSS so we preserve their values in the config files. config ZBOOT_ROM_TEXT @@ -1742,16 +1737,31 @@ config CMDLINE time by entering them here. As a minimum, you should specify the memory size and the root device (e.g., mem=64M root=/dev/nfs). +choice + prompt "Kernel command line type" if CMDLINE != "" + default CMDLINE_FROM_BOOTLOADER + +config CMDLINE_FROM_BOOTLOADER + bool "Use bootloader kernel arguments if available" + help + Uses the command-line options passed by the boot loader. If + the boot loader doesn't provide any, the default kernel command + string provided in CMDLINE will be used. + +config CMDLINE_EXTEND + bool "Extend bootloader kernel arguments" + help + The command-line arguments provided by the boot loader will be + appended to the default kernel command string. + config CMDLINE_FORCE bool "Always use the default kernel command string" - depends on CMDLINE != "" help Always use the default kernel command string, even if the boot loader passes other arguments to the kernel. This is useful if you cannot or don't want to change the command-line options your boot loader passes to the kernel. - - If unsure, say N. +endchoice config XIP_KERNEL bool "Kernel Execute-In-Place from ROM" @@ -2010,7 +2020,7 @@ menu "Power management options" source "kernel/power/Kconfig" config ARCH_SUSPEND_POSSIBLE - depends on !ARCH_S5P64X0 && !ARCH_S5P6442 + depends on !ARCH_S5P64X0 && !ARCH_S5PC100 depends on CPU_ARM920T || CPU_ARM926T || CPU_SA1100 || \ CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE def_bool y diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 03d01d783e3b..81cbe40c159c 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -63,13 +63,6 @@ config DEBUG_USER 8 - SIGSEGV faults 16 - SIGBUS faults -config DEBUG_STACK_USAGE - bool "Enable stack utilization instrumentation" - depends on DEBUG_KERNEL - help - Enables the display of the minimum amount of free stack which each - task has ever had available in the sysrq-T output. - # These options are only for real kernel hackers who want to get their hands dirty. config DEBUG_LL bool "Kernel low-level debugging functions" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index c7d321a3d95d..f5b2b390c8f2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -158,13 +158,11 @@ machine-$(CONFIG_ARCH_MV78XX0) := mv78xx0 machine-$(CONFIG_ARCH_MX1) := imx machine-$(CONFIG_ARCH_MX2) := imx machine-$(CONFIG_ARCH_MX25) := imx -machine-$(CONFIG_ARCH_MX3) := mx3 +machine-$(CONFIG_ARCH_MX3) := imx machine-$(CONFIG_ARCH_MX5) := mx5 -machine-$(CONFIG_ARCH_MXC91231) := mxc91231 machine-$(CONFIG_ARCH_MXS) := mxs machine-$(CONFIG_ARCH_NETX) := netx machine-$(CONFIG_ARCH_NOMADIK) := nomadik -machine-$(CONFIG_ARCH_NS9XXX) := ns9xxx machine-$(CONFIG_ARCH_OMAP1) := omap1 machine-$(CONFIG_ARCH_OMAP2) := omap2 machine-$(CONFIG_ARCH_OMAP3) := omap2 @@ -178,15 +176,12 @@ machine-$(CONFIG_ARCH_S3C2410) := s3c2410 s3c2400 s3c2412 s3c2416 s3c2440 s3c24 machine-$(CONFIG_ARCH_S3C24A0) := s3c24a0 machine-$(CONFIG_ARCH_S3C64XX) := s3c64xx machine-$(CONFIG_ARCH_S5P64X0) := s5p64x0 -machine-$(CONFIG_ARCH_S5P6442) := s5p6442 machine-$(CONFIG_ARCH_S5PC100) := s5pc100 machine-$(CONFIG_ARCH_S5PV210) := s5pv210 machine-$(CONFIG_ARCH_EXYNOS4) := exynos4 machine-$(CONFIG_ARCH_SA1100) := sa1100 machine-$(CONFIG_ARCH_SHARK) := shark machine-$(CONFIG_ARCH_SHMOBILE) := shmobile -machine-$(CONFIG_ARCH_STMP378X) := stmp378x -machine-$(CONFIG_ARCH_STMP37XX) := stmp37xx machine-$(CONFIG_ARCH_TCC8K) := tcc8k machine-$(CONFIG_ARCH_TEGRA) := tegra machine-$(CONFIG_ARCH_U300) := u300 @@ -207,7 +202,6 @@ machine-$(CONFIG_MACH_SPEAR600) := spear6xx plat-$(CONFIG_ARCH_MXC) := mxc plat-$(CONFIG_ARCH_OMAP) := omap plat-$(CONFIG_ARCH_S3C64XX) := samsung -plat-$(CONFIG_ARCH_STMP3XXX) := stmp3xxx plat-$(CONFIG_ARCH_TCC_926) := tcc plat-$(CONFIG_PLAT_IOP) := iop plat-$(CONFIG_PLAT_NOMADIK) := nomadik diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 8ebbb511c783..23aad0722303 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -74,7 +74,7 @@ ZTEXTADDR := $(CONFIG_ZBOOT_ROM_TEXT) ZBSSADDR := $(CONFIG_ZBOOT_ROM_BSS) else ZTEXTADDR := 0 -ZBSSADDR := ALIGN(4) +ZBSSADDR := ALIGN(8) endif SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ @@ -98,8 +98,6 @@ endif ccflags-y := -fpic -fno-builtin asflags-y := -Wa,-march=all -# Provide size of uncompressed kernel to the decompressor via a linker symbol. -LDFLAGS_vmlinux = --defsym _image_size=$(shell stat -c "%s" $(obj)/../Image) # Supply ZRELADDR to the decompressor via a linker symbol. ifneq ($(CONFIG_AUTO_ZRELADDR),y) LDFLAGS_vmlinux += --defsym zreladdr=$(ZRELADDR) @@ -122,10 +120,23 @@ lib1funcs = $(obj)/lib1funcs.o $(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE $(call cmd,shipped) +# We need to prevent any GOTOFF relocs being used with references +# to symbols in the .bss section since we cannot relocate them +# independently from the rest at run time. This can be achieved by +# ensuring that no private .bss symbols exist, as global symbols +# always have a GOT entry which is what we need. +# The .data section is already discarded by the linker script so no need +# to bother about it here. +check_for_bad_syms = \ +bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \ +[ -z "$$bad_syms" ] || \ + ( echo "following symbols must have non local/private scope:" >&2; \ + echo "$$bad_syms" >&2; rm -f $@; false ) + $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \ $(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE $(call if_changed,ld) - @: + @$(check_for_bad_syms) $(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE $(call if_changed,$(suffix_y)) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index 4c72a97bc3e1..07be5a2f8302 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -44,7 +44,7 @@ extern void error(char *); #include "../../../../lib/decompress_unlzma.c" #endif -void do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) +int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)) { - decompress(input, len, NULL, NULL, output, NULL, error); + return decompress(input, len, NULL, NULL, output, NULL, error); } diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index adf583cd0c35..942fad97e447 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -179,16 +179,29 @@ not_angel: bl cache_on restart: adr r0, LC0 - ldmia r0, {r1, r2, r3, r5, r6, r9, r11, r12} - ldr sp, [r0, #32] + ldmia r0, {r1, r2, r3, r6, r10, r11, r12} + ldr sp, [r0, #28] /* * We might be running at a different address. We need * to fix up various pointers. */ sub r0, r0, r1 @ calculate the delta offset - add r5, r5, r0 @ _start add r6, r6, r0 @ _edata + add r10, r10, r0 @ inflated kernel size location + + /* + * The kernel build system appends the size of the + * decompressed kernel at the end of the compressed data + * in little-endian form. + */ + ldrb r9, [r10, #0] + ldrb lr, [r10, #1] + orr r9, r9, lr, lsl #8 + ldrb lr, [r10, #2] + ldrb r10, [r10, #3] + orr r9, r9, lr, lsl #16 + orr r9, r9, r10, lsl #24 #ifndef CONFIG_ZBOOT_ROM /* malloc space is above the relocated stack (64k max) */ @@ -206,31 +219,40 @@ restart: adr r0, LC0 /* * Check to see if we will overwrite ourselves. * r4 = final kernel address - * r5 = start of this image * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP * We basically want: - * r4 >= r10 -> OK - * r4 + image length <= r5 -> OK + * r4 - 16k page directory >= r10 -> OK + * r4 + image length <= current position (pc) -> OK */ + add r10, r10, #16384 cmp r4, r10 bhs wont_overwrite add r10, r4, r9 - cmp r10, r5 + ARM( cmp r10, pc ) + THUMB( mov lr, pc ) + THUMB( cmp r10, lr ) bls wont_overwrite /* * Relocate ourselves past the end of the decompressed kernel. - * r5 = start of this image * r6 = _edata * r10 = end of the decompressed kernel * Because we always copy ahead, we need to do it from the end and go * backward in case the source and destination overlap. */ - /* Round up to next 256-byte boundary. */ - add r10, r10, #256 + /* + * Bump to the next 256-byte boundary with the size of + * the relocation code added. This avoids overwriting + * ourself when the offset is small. + */ + add r10, r10, #((reloc_code_end - restart + 256) & ~255) bic r10, r10, #255 + /* Get start of code we want to copy and align it down. */ + adr r5, restart + bic r5, r5, #31 + sub r9, r6, r5 @ size to copy add r9, r9, #31 @ rounded up to a multiple bic r9, r9, #31 @ ... of 32 bytes @@ -245,6 +267,11 @@ restart: adr r0, LC0 /* Preserve offset to relocated code. */ sub r6, r9, r6 +#ifndef CONFIG_ZBOOT_ROM + /* cache_clean_flush may use the stack, so relocate it */ + add sp, sp, r6 +#endif + bl cache_clean_flush adr r0, BSYM(restart) @@ -333,12 +360,11 @@ not_relocated: mov r0, #0 LC0: .word LC0 @ r1 .word __bss_start @ r2 .word _end @ r3 - .word _start @ r5 .word _edata @ r6 - .word _image_size @ r9 + .word input_data_end - 4 @ r10 (inflated size location) .word _got_start @ r11 .word _got_end @ ip - .word user_stack_end @ sp + .word .L_user_stack_end @ sp .size LC0, . - LC0 #ifdef CONFIG_ARCH_RPC @@ -447,7 +473,11 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size orr r1, r1, #3 << 10 add r2, r3, #16384 1: cmp r1, r9 @ if virt > start of RAM +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + orrhs r1, r1, #0x08 @ set cacheable +#else orrhs r1, r1, #0x0c @ set cacheable, bufferable +#endif cmp r1, r10 @ if virt > end of RAM bichs r1, r1, #0x0c @ clear cacheable, bufferable str r1, [r0], #4 @ 1:1 mapping @@ -472,6 +502,12 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size mov pc, lr ENDPROC(__setup_mmu) +__arm926ejs_mmu_cache_on: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ put dcache in WT mode + mcr p15, 7, r0, c15, c0, 0 +#endif + __armv4_mmu_cache_on: mov r12, lr #ifdef CONFIG_MMU @@ -653,6 +689,12 @@ proc_types: W(b) __armv4_mpu_cache_off W(b) __armv4_mpu_cache_flush + .word 0x41069260 @ ARM926EJ-S (v5TEJ) + .word 0xff0ffff0 + W(b) __arm926ejs_mmu_cache_on + W(b) __armv4_mmu_cache_off + W(b) __armv5tej_mmu_cache_flush + .word 0x00007000 @ ARM7 IDs .word 0x0000f000 mov pc, lr @@ -735,12 +777,6 @@ proc_types: W(b) __armv4_mmu_cache_off W(b) __armv6_mmu_cache_flush - .word 0x560f5810 @ Marvell PJ4 ARMv6 - .word 0xff0ffff0 - W(b) __armv4_mmu_cache_on - W(b) __armv4_mmu_cache_off - W(b) __armv6_mmu_cache_flush - .word 0x000f0000 @ new CPU Id .word 0x000f0000 W(b) __armv7_mmu_cache_on @@ -1062,8 +1098,9 @@ memdump: mov r12, r0 #endif .ltorg +reloc_code_end: .align .section ".stack", "aw", %nobits -user_stack: .space 4096 -user_stack_end: +.L_user_stack: .space 4096 +.L_user_stack_end: diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 2df38263124c..832d37236c59 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c @@ -26,8 +26,6 @@ unsigned int __machine_arch_type; #include #include -#include - static void putstr(const char *ptr); extern void error(char *x); @@ -139,13 +137,12 @@ void *memcpy(void *__dest, __const void *__src, size_t __n) } /* - * gzip delarations + * gzip declarations */ extern char input_data[]; extern char input_data_end[]; unsigned char *output_data; -unsigned long output_ptr; unsigned long free_mem_ptr; unsigned long free_mem_end_ptr; @@ -170,15 +167,15 @@ asmlinkage void __div0(void) error("Attempting division by 0!"); } -extern void do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)); +extern int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x)); -unsigned long +void decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, unsigned long free_mem_ptr_end_p, int arch_id) { - unsigned char *tmp; + int ret; output_data = (unsigned char *)output_start; free_mem_ptr = free_mem_ptr_p; @@ -187,12 +184,11 @@ decompress_kernel(unsigned long output_start, unsigned long free_mem_ptr_p, arch_decomp_setup(); - tmp = (unsigned char *) (((unsigned long)input_data_end) - 4); - output_ptr = get_unaligned_le32(tmp); - putstr("Uncompressing Linux..."); - do_decompress(input_data, input_data_end - input_data, - output_data, error); - putstr(" done, booting the kernel.\n"); - return output_ptr; + ret = do_decompress(input_data, input_data_end - input_data, + output_data, error); + if (ret) + error("decompressor returned an error"); + else + putstr(" done, booting the kernel.\n"); } diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.in index 5309909d7282..ea80abe78844 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.in @@ -54,6 +54,7 @@ SECTIONS .bss : { *(.bss) } _end = .; + . = ALIGN(8); /* the stack must be 64-bit aligned */ .stack : { *(.stack) } .stab 0 : { *(.stab) } diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig index ea5ee4d067f3..4b71766fb21d 100644 --- a/arch/arm/common/Kconfig +++ b/arch/arm/common/Kconfig @@ -7,7 +7,7 @@ config ARM_VIC config ARM_VIC_NR int default 4 if ARCH_S5PV210 - default 3 if ARCH_S5P6442 || ARCH_S5PC100 + default 3 if ARCH_S5PC100 default 2 depends on ARM_VIC help diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index f70ec7dadebb..4ddd0a6ac7ff 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -49,7 +49,7 @@ struct gic_chip_data { * Default make them NULL. */ struct irq_chip gic_arch_extn = { - .irq_ack = NULL, + .irq_eoi = NULL, .irq_mask = NULL, .irq_unmask = NULL, .irq_retrigger = NULL, @@ -84,21 +84,12 @@ static inline unsigned int gic_irq(struct irq_data *d) /* * Routines to acknowledge, disable and enable interrupts */ -static void gic_ack_irq(struct irq_data *d) -{ - spin_lock(&irq_controller_lock); - if (gic_arch_extn.irq_ack) - gic_arch_extn.irq_ack(d); - writel(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); - spin_unlock(&irq_controller_lock); -} - static void gic_mask_irq(struct irq_data *d) { u32 mask = 1 << (d->irq % 32); spin_lock(&irq_controller_lock); - writel(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4); + writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4); if (gic_arch_extn.irq_mask) gic_arch_extn.irq_mask(d); spin_unlock(&irq_controller_lock); @@ -111,10 +102,21 @@ static void gic_unmask_irq(struct irq_data *d) spin_lock(&irq_controller_lock); if (gic_arch_extn.irq_unmask) gic_arch_extn.irq_unmask(d); - writel(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4); + writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4); spin_unlock(&irq_controller_lock); } +static void gic_eoi_irq(struct irq_data *d) +{ + if (gic_arch_extn.irq_eoi) { + spin_lock(&irq_controller_lock); + gic_arch_extn.irq_eoi(d); + spin_unlock(&irq_controller_lock); + } + + writel_relaxed(gic_irq(d), gic_cpu_base(d) + GIC_CPU_EOI); +} + static int gic_set_type(struct irq_data *d, unsigned int type) { void __iomem *base = gic_dist_base(d); @@ -138,7 +140,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) if (gic_arch_extn.irq_set_type) gic_arch_extn.irq_set_type(d, type); - val = readl(base + GIC_DIST_CONFIG + confoff); + val = readl_relaxed(base + GIC_DIST_CONFIG + confoff); if (type == IRQ_TYPE_LEVEL_HIGH) val &= ~confmask; else if (type == IRQ_TYPE_EDGE_RISING) @@ -148,15 +150,15 @@ static int gic_set_type(struct irq_data *d, unsigned int type) * As recommended by the spec, disable the interrupt before changing * the configuration */ - if (readl(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) { - writel(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff); + if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) { + writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff); enabled = true; } - writel(val, base + GIC_DIST_CONFIG + confoff); + writel_relaxed(val, base + GIC_DIST_CONFIG + confoff); if (enabled) - writel(enablemask, base + GIC_DIST_ENABLE_SET + enableoff); + writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff); spin_unlock(&irq_controller_lock); @@ -188,8 +190,8 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, spin_lock(&irq_controller_lock); d->node = cpu; - val = readl(reg) & ~mask; - writel(val | bit, reg); + val = readl_relaxed(reg) & ~mask; + writel_relaxed(val | bit, reg); spin_unlock(&irq_controller_lock); return 0; @@ -218,11 +220,10 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) unsigned int cascade_irq, gic_irq; unsigned long status; - /* primary controller ack'ing */ - chip->irq_ack(&desc->irq_data); + chained_irq_enter(chip, desc); spin_lock(&irq_controller_lock); - status = readl(chip_data->cpu_base + GIC_CPU_INTACK); + status = readl_relaxed(chip_data->cpu_base + GIC_CPU_INTACK); spin_unlock(&irq_controller_lock); gic_irq = (status & 0x3ff); @@ -236,15 +237,14 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) generic_handle_irq(cascade_irq); out: - /* primary controller unmasking */ - chip->irq_unmask(&desc->irq_data); + chained_irq_exit(chip, desc); } static struct irq_chip gic_chip = { .name = "GIC", - .irq_ack = gic_ack_irq, .irq_mask = gic_mask_irq, .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, .irq_retrigger = gic_retrigger, #ifdef CONFIG_SMP @@ -272,13 +272,13 @@ static void __init gic_dist_init(struct gic_chip_data *gic, cpumask |= cpumask << 8; cpumask |= cpumask << 16; - writel(0, base + GIC_DIST_CTRL); + writel_relaxed(0, base + GIC_DIST_CTRL); /* * Find out how many interrupts are supported. * The GIC only supports up to 1020 interrupt sources. */ - gic_irqs = readl(base + GIC_DIST_CTR) & 0x1f; + gic_irqs = readl_relaxed(base + GIC_DIST_CTR) & 0x1f; gic_irqs = (gic_irqs + 1) * 32; if (gic_irqs > 1020) gic_irqs = 1020; @@ -287,26 +287,26 @@ static void __init gic_dist_init(struct gic_chip_data *gic, * Set all global interrupts to be level triggered, active low. */ for (i = 32; i < gic_irqs; i += 16) - writel(0, base + GIC_DIST_CONFIG + i * 4 / 16); + writel_relaxed(0, base + GIC_DIST_CONFIG + i * 4 / 16); /* * Set all global interrupts to this CPU only. */ for (i = 32; i < gic_irqs; i += 4) - writel(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); + writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); /* * Set priority on all global interrupts. */ for (i = 32; i < gic_irqs; i += 4) - writel(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); + writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); /* * Disable all interrupts. Leave the PPI and SGIs alone * as these enables are banked registers. */ for (i = 32; i < gic_irqs; i += 32) - writel(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32); + writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32); /* * Limit number of interrupts registered to the platform maximum @@ -319,12 +319,12 @@ static void __init gic_dist_init(struct gic_chip_data *gic, * Setup the Linux IRQ subsystem. */ for (i = irq_start; i < irq_limit; i++) { - irq_set_chip_and_handler(i, &gic_chip, handle_level_irq); + irq_set_chip_and_handler(i, &gic_chip, handle_fasteoi_irq); irq_set_chip_data(i, gic); set_irq_flags(i, IRQF_VALID | IRQF_PROBE); } - writel(1, base + GIC_DIST_CTRL); + writel_relaxed(1, base + GIC_DIST_CTRL); } static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) @@ -337,17 +337,17 @@ static void __cpuinit gic_cpu_init(struct gic_chip_data *gic) * Deal with the banked PPI and SGI interrupts - disable all * PPI interrupts, ensure all SGI interrupts are enabled. */ - writel(0xffff0000, dist_base + GIC_DIST_ENABLE_CLEAR); - writel(0x0000ffff, dist_base + GIC_DIST_ENABLE_SET); + writel_relaxed(0xffff0000, dist_base + GIC_DIST_ENABLE_CLEAR); + writel_relaxed(0x0000ffff, dist_base + GIC_DIST_ENABLE_SET); /* * Set priority on PPI and SGI interrupts */ for (i = 0; i < 32; i += 4) - writel(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4 / 4); + writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4 / 4); - writel(0xf0, base + GIC_CPU_PRIMASK); - writel(1, base + GIC_CPU_CTRL); + writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); + writel_relaxed(1, base + GIC_CPU_CTRL); } void __init gic_init(unsigned int gic_nr, unsigned int irq_start, @@ -391,7 +391,13 @@ void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { unsigned long map = *cpus_addr(*mask); + /* + * Ensure that stores to Normal memory are visible to the + * other CPUs before issuing the IPI. + */ + dsb(); + /* this always happens on GIC0 */ - writel(map << 16 | irq, gic_data[0].dist_base + GIC_DIST_SOFTINT); + writel_relaxed(map << 16 | irq, gic_data[0].dist_base + GIC_DIST_SOFTINT); } #endif diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index a12b33c0dc42..9c49a46a2b7a 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -185,14 +185,6 @@ static struct sa1111_dev_info sa1111_devices[] = { }, }; -void __init sa1111_adjust_zones(unsigned long *size, unsigned long *holes) -{ - unsigned int sz = SZ_1M >> PAGE_SHIFT; - - size[1] = size[0] - sz; - size[0] = sz; -} - /* * SA1111 interrupt support. Since clearing an IRQ while there are * active IRQs causes the interrupt output to pulse, the upper levels diff --git a/arch/arm/common/timer-sp.c b/arch/arm/common/timer-sp.c index 6ef3342153b9..41df47875122 100644 --- a/arch/arm/common/timer-sp.c +++ b/arch/arm/common/timer-sp.c @@ -18,53 +18,67 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include +#include #include #include #include #include -/* - * These timers are currently always setup to be clocked at 1MHz. - */ -#define TIMER_FREQ_KHZ (1000) -#define TIMER_RELOAD (TIMER_FREQ_KHZ * 1000 / HZ) - -static void __iomem *clksrc_base; - -static cycle_t sp804_read(struct clocksource *cs) +static long __init sp804_get_clock_rate(const char *name) { - return ~readl(clksrc_base + TIMER_VALUE); + struct clk *clk; + long rate; + int err; + + clk = clk_get_sys("sp804", name); + if (IS_ERR(clk)) { + pr_err("sp804: %s clock not found: %d\n", name, + (int)PTR_ERR(clk)); + return PTR_ERR(clk); + } + + err = clk_enable(clk); + if (err) { + pr_err("sp804: %s clock failed to enable: %d\n", name, err); + clk_put(clk); + return err; + } + + rate = clk_get_rate(clk); + if (rate < 0) { + pr_err("sp804: %s clock failed to get rate: %ld\n", name, rate); + clk_disable(clk); + clk_put(clk); + } + + return rate; } -static struct clocksource clocksource_sp804 = { - .name = "timer3", - .rating = 200, - .read = sp804_read, - .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -void __init sp804_clocksource_init(void __iomem *base) +void __init sp804_clocksource_init(void __iomem *base, const char *name) { - struct clocksource *cs = &clocksource_sp804; + long rate = sp804_get_clock_rate(name); - clksrc_base = base; + if (rate < 0) + return; /* setup timer 0 as free-running clocksource */ - writel(0, clksrc_base + TIMER_CTRL); - writel(0xffffffff, clksrc_base + TIMER_LOAD); - writel(0xffffffff, clksrc_base + TIMER_VALUE); + writel(0, base + TIMER_CTRL); + writel(0xffffffff, base + TIMER_LOAD); + writel(0xffffffff, base + TIMER_VALUE); writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, - clksrc_base + TIMER_CTRL); + base + TIMER_CTRL); - clocksource_register_khz(cs, TIMER_FREQ_KHZ); + clocksource_mmio_init(base + TIMER_VALUE, name, + rate, 200, 32, clocksource_mmio_readl_down); } static void __iomem *clkevt_base; +static unsigned long clkevt_reload; /* * IRQ handler for the timer @@ -90,7 +104,7 @@ static void sp804_set_mode(enum clock_event_mode mode, switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - writel(TIMER_RELOAD, clkevt_base + TIMER_LOAD); + writel(clkevt_reload, clkevt_base + TIMER_LOAD); ctrl |= TIMER_CTRL_PERIODIC | TIMER_CTRL_ENABLE; break; @@ -120,7 +134,6 @@ static int sp804_set_next_event(unsigned long next, } static struct clock_event_device sp804_clockevent = { - .name = "timer0", .shift = 32, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = sp804_set_mode, @@ -136,17 +149,24 @@ static struct irqaction sp804_timer_irq = { .dev_id = &sp804_clockevent, }; -void __init sp804_clockevents_init(void __iomem *base, unsigned int timer_irq) +void __init sp804_clockevents_init(void __iomem *base, unsigned int irq, + const char *name) { struct clock_event_device *evt = &sp804_clockevent; + long rate = sp804_get_clock_rate(name); + + if (rate < 0) + return; clkevt_base = base; + clkevt_reload = DIV_ROUND_CLOSEST(rate, HZ); - evt->irq = timer_irq; - evt->mult = div_sc(TIMER_FREQ_KHZ, NSEC_PER_MSEC, evt->shift); + evt->name = name; + evt->irq = irq; + evt->mult = div_sc(rate, NSEC_PER_SEC, evt->shift); evt->max_delta_ns = clockevent_delta2ns(0xffffffff, evt); evt->min_delta_ns = clockevent_delta2ns(0xf, evt); - setup_irq(timer_irq, &sp804_timer_irq); + setup_irq(irq, &sp804_timer_irq); clockevents_register_device(evt); } diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c index 113085a77123..7aa4262ada7a 100644 --- a/arch/arm/common/vic.c +++ b/arch/arm/common/vic.c @@ -22,17 +22,16 @@ #include #include #include -#include +#include #include #include #include #include -#if defined(CONFIG_PM) +#ifdef CONFIG_PM /** * struct vic_device - VIC PM device - * @sysdev: The system device which is registered. * @irq: The IRQ number for the base of the VIC. * @base: The register base for the VIC. * @resume_sources: A bitmask of interrupts for resume. @@ -43,8 +42,6 @@ * @protect: Save for VIC_PROTECT. */ struct vic_device { - struct sys_device sysdev; - void __iomem *base; int irq; u32 resume_sources; @@ -59,11 +56,6 @@ struct vic_device { static struct vic_device vic_devices[CONFIG_ARM_VIC_NR]; static int vic_id; - -static inline struct vic_device *to_vic(struct sys_device *sys) -{ - return container_of(sys, struct vic_device, sysdev); -} #endif /* CONFIG_PM */ /** @@ -85,10 +77,9 @@ static void vic_init2(void __iomem *base) writel(32, base + VIC_PL190_DEF_VECT_ADDR); } -#if defined(CONFIG_PM) -static int vic_class_resume(struct sys_device *dev) +#ifdef CONFIG_PM +static void resume_one_vic(struct vic_device *vic) { - struct vic_device *vic = to_vic(dev); void __iomem *base = vic->base; printk(KERN_DEBUG "%s: resuming vic at %p\n", __func__, base); @@ -107,13 +98,18 @@ static int vic_class_resume(struct sys_device *dev) writel(vic->soft_int, base + VIC_INT_SOFT); writel(~vic->soft_int, base + VIC_INT_SOFT_CLEAR); - - return 0; } -static int vic_class_suspend(struct sys_device *dev, pm_message_t state) +static void vic_resume(void) +{ + int id; + + for (id = vic_id - 1; id >= 0; id--) + resume_one_vic(vic_devices + id); +} + +static void suspend_one_vic(struct vic_device *vic) { - struct vic_device *vic = to_vic(dev); void __iomem *base = vic->base; printk(KERN_DEBUG "%s: suspending vic at %p\n", __func__, base); @@ -128,14 +124,21 @@ static int vic_class_suspend(struct sys_device *dev, pm_message_t state) writel(vic->resume_irqs, base + VIC_INT_ENABLE); writel(~vic->resume_irqs, base + VIC_INT_ENABLE_CLEAR); +} + +static int vic_suspend(void) +{ + int id; + + for (id = 0; id < vic_id; id++) + suspend_one_vic(vic_devices + id); return 0; } -struct sysdev_class vic_class = { - .name = "vic", - .suspend = vic_class_suspend, - .resume = vic_class_resume, +struct syscore_ops vic_syscore_ops = { + .suspend = vic_suspend, + .resume = vic_resume, }; /** @@ -147,30 +150,8 @@ struct sysdev_class vic_class = { */ static int __init vic_pm_init(void) { - struct vic_device *dev = vic_devices; - int err; - int id; - - if (vic_id == 0) - return 0; - - err = sysdev_class_register(&vic_class); - if (err) { - printk(KERN_ERR "%s: cannot register class\n", __func__); - return err; - } - - for (id = 0; id < vic_id; id++, dev++) { - dev->sysdev.id = id; - dev->sysdev.cls = &vic_class; - - err = sysdev_register(&dev->sysdev); - if (err) { - printk(KERN_ERR "%s: failed to register device\n", - __func__); - return err; - } - } + if (vic_id > 0) + register_syscore_ops(&vic_syscore_ops); return 0; } diff --git a/arch/arm/configs/at572d940hfek_defconfig b/arch/arm/configs/at572d940hfek_defconfig deleted file mode 100644 index 1b1158ae8f82..000000000000 --- a/arch/arm/configs/at572d940hfek_defconfig +++ /dev/null @@ -1,358 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_LOCALVERSION="-AT572D940HF" -# CONFIG_LOCALVERSION_AUTO is not set -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_TASKSTATS=y -CONFIG_TASK_XACCT=y -CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_AUDIT=y -CONFIG_CGROUPS=y -CONFIG_CGROUP_CPUACCT=y -CONFIG_CGROUP_SCHED=y -CONFIG_RT_GROUP_SCHED=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_RELAY=y -CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_PROFILING=y -CONFIG_OPROFILE=m -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_ARCH_AT91=y -CONFIG_ARCH_AT572D940HF=y -CONFIG_MACH_AT572D940HFEB=y -CONFIG_AT91_PROGRAMMABLE_CLOCKS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y -CONFIG_CMDLINE="mem=48M console=ttyS0 initrd=0x21100000,3145728 root=/dev/ram0 rw ip=172.16.1.181" -CONFIG_KEXEC=y -CONFIG_FPE_NWFPE=y -CONFIG_FPE_NWFPE_XP=y -CONFIG_NET=y -CONFIG_PACKET=m -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_NET_PKTGEN=m -CONFIG_NET_TCPPROBE=m -CONFIG_CAN=m -CONFIG_CAN_RAW=m -CONFIG_CAN_BCM=m -CONFIG_CAN_VCAN=m -CONFIG_CAN_DEBUG_DEVICES=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_CONNECTOR=m -CONFIG_MTD=m -CONFIG_MTD_DEBUG=y -CONFIG_MTD_DEBUG_VERBOSE=1 -CONFIG_MTD_CONCAT=m -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=m -CONFIG_MTD_BLOCK=m -CONFIG_MTD_BLOCK_RO=m -CONFIG_FTL=m -CONFIG_NFTL=m -CONFIG_NFTL_RW=y -CONFIG_INFTL=m -CONFIG_RFD_FTL=m -CONFIG_SSFDC=m -CONFIG_MTD_OOPS=m -CONFIG_MTD_CFI=m -CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_CFI_INTELEXT=m -CONFIG_MTD_CFI_AMDSTD=m -CONFIG_MTD_CFI_STAA=m -CONFIG_MTD_ROM=m -CONFIG_MTD_ABSENT=m -CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_PHYSMAP=m -CONFIG_MTD_PLATRAM=m -CONFIG_MTD_DATAFLASH=m -CONFIG_MTD_M25P80=m -CONFIG_MTD_SLRAM=m -CONFIG_MTD_PHRAM=m -CONFIG_MTD_MTDRAM=m -CONFIG_MTD_BLOCK2MTD=m -CONFIG_MTD_NAND=m -CONFIG_MTD_NAND_VERIFY_WRITE=y -CONFIG_MTD_NAND_DISKONCHIP=m -CONFIG_MTD_NAND_NANDSIM=m -CONFIG_MTD_NAND_PLATFORM=m -CONFIG_MTD_ALAUDA=m -CONFIG_MTD_UBI=m -CONFIG_MTD_UBI_GLUEBI=m -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=65536 -CONFIG_ATMEL_TCLIB=y -CONFIG_ATMEL_SSC=m -CONFIG_SENSORS_TSL2550=m -CONFIG_DS1682=m -CONFIG_RAID_ATTRS=m -CONFIG_SCSI=m -CONFIG_SCSI_TGT=m -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=m -CONFIG_BLK_DEV_SR=m -CONFIG_CHR_DEV_SG=m -CONFIG_CHR_DEV_SCH=m -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y -CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_ISCSI_ATTRS=m -CONFIG_NETDEVICES=y -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_MACVLAN=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_VETH=m -CONFIG_PHYLIB=y -CONFIG_MARVELL_PHY=m -CONFIG_DAVICOM_PHY=m -CONFIG_QSEMI_PHY=m -CONFIG_LXT_PHY=m -CONFIG_CICADA_PHY=m -CONFIG_VITESSE_PHY=m -CONFIG_SMSC_PHY=m -CONFIG_BROADCOM_PHY=m -CONFIG_ICPLUS_PHY=m -CONFIG_MDIO_BITBANG=m -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set -CONFIG_USB_ZD1201=m -CONFIG_HOSTAP=m -CONFIG_HOSTAP_FIRMWARE=y -CONFIG_HOSTAP_FIRMWARE_NVRAM=y -CONFIG_USB_CATC=m -CONFIG_USB_KAWETH=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_USBNET=m -CONFIG_USB_NET_DM9601=m -CONFIG_USB_NET_GL620A=m -CONFIG_USB_NET_PLUSB=m -CONFIG_USB_NET_MCS7830=m -CONFIG_USB_NET_RNDIS_HOST=m -CONFIG_USB_ALI_M5632=y -CONFIG_USB_AN2720=y -CONFIG_USB_EPSON2888=y -CONFIG_USB_KC2190=y -# CONFIG_USB_NET_ZAURUS is not set -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_EVDEV=m -CONFIG_INPUT_EVBUG=m -CONFIG_KEYBOARD_LKKBD=m -CONFIG_KEYBOARD_GPIO=m -CONFIG_KEYBOARD_NEWTON=m -CONFIG_KEYBOARD_STOWAWAY=m -CONFIG_KEYBOARD_SUNKBD=m -CONFIG_KEYBOARD_XTKBD=m -CONFIG_MOUSE_PS2=m -CONFIG_MOUSE_SERIAL=m -CONFIG_MOUSE_APPLETOUCH=m -CONFIG_MOUSE_VSXXXAA=m -CONFIG_MOUSE_GPIO=m -CONFIG_INPUT_MISC=y -CONFIG_INPUT_UINPUT=m -CONFIG_SERIO_SERPORT=m -CONFIG_SERIO_RAW=m -CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_N_HDLC=m -CONFIG_SPECIALIX=m -CONFIG_STALDRV=y -CONFIG_SERIAL_ATMEL=y -CONFIG_SERIAL_ATMEL_CONSOLE=y -CONFIG_IPMI_HANDLER=m -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_SI=m -CONFIG_IPMI_WATCHDOG=m -CONFIG_IPMI_POWEROFF=m -CONFIG_HW_RANDOM=y -CONFIG_R3964=m -CONFIG_RAW_DRIVER=m -CONFIG_TCG_TPM=m -CONFIG_TCG_NSC=m -CONFIG_TCG_ATMEL=m -CONFIG_I2C=m -CONFIG_I2C_CHARDEV=m -CONFIG_SPI=y -CONFIG_SPI_ATMEL=y -CONFIG_SPI_BITBANG=m -CONFIG_SPI_SPIDEV=m -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -CONFIG_SOUND=m -CONFIG_SND=m -CONFIG_SND_SEQUENCER=m -CONFIG_SND_SEQ_DUMMY=m -CONFIG_SND_MIXER_OSS=m -CONFIG_SND_PCM_OSS=m -# CONFIG_SND_PCM_OSS_PLUGINS is not set -CONFIG_SND_SEQUENCER_OSS=y -CONFIG_SND_DYNAMIC_MINORS=y -# CONFIG_SND_VERBOSE_PROCFS is not set -CONFIG_SND_DUMMY=m -CONFIG_SND_VIRMIDI=m -CONFIG_SND_USB_AUDIO=m -CONFIG_SND_USB_CAIAQ=m -CONFIG_SND_USB_CAIAQ_INPUT=y -CONFIG_HID=m -CONFIG_HIDRAW=y -CONFIG_USB_HID=m -CONFIG_USB_HIDDEV=y -CONFIG_USB_KBD=m -CONFIG_USB_MOUSE=m -CONFIG_HID_A4TECH=m -CONFIG_HID_APPLE=m -CONFIG_HID_BELKIN=m -CONFIG_HID_CHERRY=m -CONFIG_HID_CHICONY=m -CONFIG_HID_CYPRESS=m -CONFIG_HID_EZKEY=m -CONFIG_HID_GYRATION=m -CONFIG_HID_LOGITECH=m -CONFIG_HID_MICROSOFT=m -CONFIG_HID_MONTEREY=m -CONFIG_HID_PANTHERLORD=m -CONFIG_HID_PETALYNX=m -CONFIG_HID_SAMSUNG=m -CONFIG_HID_SONY=m -CONFIG_HID_SUNPLUS=m -CONFIG_USB=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set -CONFIG_USB_DYNAMIC_MINORS=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=m -CONFIG_USB_STORAGE_DATAFAB=m -CONFIG_USB_STORAGE_FREECOM=m -CONFIG_USB_STORAGE_ISD200=m -CONFIG_USB_STORAGE_USBAT=m -CONFIG_USB_STORAGE_SDDR09=m -CONFIG_USB_STORAGE_SDDR55=m -CONFIG_USB_STORAGE_JUMPSHOT=m -CONFIG_USB_STORAGE_ALAUDA=m -CONFIG_USB_STORAGE_KARMA=m -CONFIG_USB_LIBUSUAL=y -CONFIG_USB_SERIAL=m -CONFIG_USB_EZUSB=y -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_SPCP8X5=m -CONFIG_USB_SERIAL_DEBUG=m -CONFIG_USB_EMI62=m -CONFIG_USB_EMI26=m -CONFIG_USB_ADUTUX=m -CONFIG_USB_TEST=m -CONFIG_USB_GADGET=m -CONFIG_USB_GADGET_DEBUG_FILES=y -CONFIG_USB_GADGET_DEBUG_FS=y -CONFIG_USB_ZERO=m -CONFIG_USB_ETH=m -CONFIG_USB_GADGETFS=m -CONFIG_USB_FILE_STORAGE=m -CONFIG_USB_G_SERIAL=m -CONFIG_USB_MIDI_GADGET=m -CONFIG_MMC=y -CONFIG_SDIO_UART=m -CONFIG_MMC_AT91=m -CONFIG_MMC_SPI=m -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=m -CONFIG_LEDS_GPIO=m -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=m -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_RTC_CLASS=y -CONFIG_RTC_INTF_DEV_UIE_EMUL=y -CONFIG_RTC_DRV_DS1307=m -CONFIG_RTC_DRV_DS1305=y -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_JBD_DEBUG=y -CONFIG_REISERFS_FS=m -CONFIG_REISERFS_CHECK=y -CONFIG_REISERFS_PROC_INFO=y -CONFIG_REISERFS_FS_XATTR=y -CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_REISERFS_FS_SECURITY=y -CONFIG_INOTIFY=y -CONFIG_FUSE_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=y -CONFIG_NTFS_FS=m -CONFIG_NTFS_RW=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_JFFS2_FS=m -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_JFFS2_LZO=y -CONFIG_JFFS2_CMODE_FAVOURLZO=y -CONFIG_CRAMFS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_NFSD=m -CONFIG_NFSD_V3_ACL=y -CONFIG_NFSD_V4=y -CONFIG_CIFS=m -CONFIG_CIFS_WEAK_PW_HASH=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_LDM_PARTITION=y -CONFIG_LDM_DEBUG=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_NLS_DEFAULT="cp437" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_NLS_UTF8=m -CONFIG_DLM=m -CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y -CONFIG_DEBUG_FS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y -CONFIG_CRYPTO_GF128MUL=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set -# CONFIG_CRYPTO_HW is not set -CONFIG_CRC_CCITT=m -CONFIG_CRC16=m diff --git a/arch/arm/configs/at91sam9261_defconfig b/arch/arm/configs/at91sam9261_defconfig new file mode 100644 index 000000000000..ade6b2f23116 --- /dev/null +++ b/arch/arm/configs/at91sam9261_defconfig @@ -0,0 +1,158 @@ +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_LZMA=y +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_NAMESPACES=y +CONFIG_EMBEDDED=y +CONFIG_SLAB=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_AT91=y +CONFIG_ARCH_AT91SAM9261=y +CONFIG_MACH_AT91SAM9261EK=y +CONFIG_AT91_PROGRAMMABLE_CLOCKS=y +# CONFIG_ARM_THUMB is not set +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="mem=64M console=ttyS0,115200 initrd=0x21100000,3145728 root=/dev/ram0 rw" +CONFIG_AUTO_ZRELADDR=y +CONFIG_VFP=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_INET_LRO is not set +# CONFIG_IPV6 is not set +CONFIG_CFG80211=y +CONFIG_LIB80211=y +CONFIG_MAC80211=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_MTD=y +CONFIG_MTD_PARTITIONS=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_ATMEL=y +CONFIG_MTD_UBI=y +CONFIG_MTD_UBI_GLUEBI=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_MISC_DEVICES=y +CONFIG_ATMEL_TCLIB=y +CONFIG_ATMEL_SSC=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_NETDEVICES=y +CONFIG_NET_ETHERNET=y +CONFIG_DM9000=y +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +CONFIG_USB_ZD1201=m +CONFIG_RTL8187=m +CONFIG_LIBERTAS=m +CONFIG_LIBERTAS_USB=m +CONFIG_LIBERTAS_SDIO=m +CONFIG_LIBERTAS_SPI=m +CONFIG_RT2X00=m +CONFIG_RT2500USB=m +CONFIG_RT73USB=m +CONFIG_ZD1211RW=m +CONFIG_INPUT_POLLDEV=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=240 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=320 +CONFIG_INPUT_EVDEV=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_ADS7846=y +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +CONFIG_SERIAL_ATMEL=y +CONFIG_SERIAL_ATMEL_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_SPI_ATMEL=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_AT91SAM9X_WATCHDOG=y +CONFIG_FB=y +CONFIG_FB_ATMEL=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +# CONFIG_LCD_CLASS_DEVICE is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_ATMEL_LCDC=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_VERBOSE_PROCFS is not set +# CONFIG_SND_DRIVERS is not set +# CONFIG_SND_ARM is not set +CONFIG_SND_AT73C213=y +CONFIG_SND_USB_AUDIO=m +# CONFIG_USB_HID is not set +CONFIG_USB=y +CONFIG_USB_DEVICEFS=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_USB_GADGET=y +CONFIG_USB_ZERO=m +CONFIG_USB_ETH=m +CONFIG_USB_GADGETFS=m +CONFIG_USB_FILE_STORAGE=m +CONFIG_USB_G_SERIAL=m +CONFIG_MMC=y +CONFIG_MMC_AT91=m +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_GPIO=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_AT91SAM9=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_UBIFS_FS=y +CONFIG_UBIFS_FS_ADVANCED_COMPR=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_LZO=y +CONFIG_SQUASHFS_XZ=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_15=y +CONFIG_NLS_UTF8=y +CONFIG_FTRACE=y +CONFIG_CRC_CCITT=m diff --git a/arch/arm/configs/at91sam9261ek_defconfig b/arch/arm/configs/at91sam9261ek_defconfig deleted file mode 100644 index b46025b66b64..000000000000 --- a/arch/arm/configs/at91sam9261ek_defconfig +++ /dev/null @@ -1,95 +0,0 @@ -CONFIG_EXPERIMENTAL=y -# CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_ARCH_AT91=y -CONFIG_ARCH_AT91SAM9261=y -CONFIG_MACH_AT91SAM9261EK=y -CONFIG_AT91_PROGRAMMABLE_CLOCKS=y -# CONFIG_ARM_THUMB is not set -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="mem=64M console=ttyS0,115200 initrd=0x21100000,3145728 root=/dev/ram0 rw" -CONFIG_FPE_NWFPE=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_INET_LRO is not set -# CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ATMEL=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_ATMEL_SSC=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_DM9000=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_KEYBOARD_ATKBD is not set -CONFIG_KEYBOARD_GPIO=y -# CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_TOUCHSCREEN_ADS7846=y -CONFIG_SERIAL_ATMEL=y -CONFIG_SERIAL_ATMEL_CONSOLE=y -CONFIG_HW_RANDOM=y -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_GPIO=y -CONFIG_SPI=y -CONFIG_SPI_ATMEL=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_AT91SAM9X_WATCHDOG=y -CONFIG_FB=y -CONFIG_FB_ATMEL=y -# CONFIG_VGA_CONSOLE is not set -# CONFIG_USB_HID is not set -CONFIG_USB=y -CONFIG_USB_DEVICEFS=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=y -CONFIG_USB_STORAGE_DEBUG=y -CONFIG_USB_GADGET=y -CONFIG_USB_ZERO=m -CONFIG_USB_GADGETFS=m -CONFIG_USB_FILE_STORAGE=m -CONFIG_USB_G_SERIAL=m -CONFIG_MMC=y -CONFIG_MMC_AT91=m -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_AT91SAM9=y -CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_CRAMFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_ISO8859_1=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_USER=y -CONFIG_DEBUG_LL=y diff --git a/arch/arm/configs/at91sam9263_defconfig b/arch/arm/configs/at91sam9263_defconfig new file mode 100644 index 000000000000..1cf96264cba1 --- /dev/null +++ b/arch/arm/configs/at91sam9263_defconfig @@ -0,0 +1,168 @@ +CONFIG_EXPERIMENTAL=y +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_KERNEL_LZMA=y +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_NAMESPACES=y +CONFIG_EMBEDDED=y +CONFIG_SLAB=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_AT91=y +CONFIG_ARCH_AT91SAM9263=y +CONFIG_MACH_AT91SAM9263EK=y +CONFIG_MACH_USB_A9263=y +CONFIG_MACH_NEOCORE926=y +CONFIG_MTD_AT91_DATAFLASH_CARD=y +# CONFIG_ARM_THUMB is not set +CONFIG_AEABI=y +# CONFIG_OABI_COMPAT is not set +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="mem=64M console=ttyS0,115200 initrd=0x21100000,3145728 root=/dev/ram0 rw" +CONFIG_AUTO_ZRELADDR=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_NET_KEY=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +CONFIG_IP_PNP_RARP=y +CONFIG_NET_IPIP=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +CONFIG_IPV6=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_MTD=y +CONFIG_MTD_PARTITIONS=y +CONFIG_MTD_CMDLINE_PARTS=y +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLOCK=y +CONFIG_NFTL=y +CONFIG_NFTL_RW=y +CONFIG_MTD_DATAFLASH=y +CONFIG_MTD_BLOCK2MTD=y +CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_ATMEL=y +CONFIG_MTD_NAND_ATMEL_ECC_SOFT=y +CONFIG_MTD_UBI=y +CONFIG_MTD_UBI_GLUEBI=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_MISC_DEVICES=y +CONFIG_ATMEL_PWM=y +CONFIG_ATMEL_TCLIB=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_MULTI_LUN=y +CONFIG_NETDEVICES=y +CONFIG_MII=y +CONFIG_SMSC_PHY=y +CONFIG_NET_ETHERNET=y +CONFIG_MACB=y +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +CONFIG_USB_ZD1201=m +CONFIG_INPUT_POLLDEV=m +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_MOUSEDEV_SCREEN_X=240 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=320 +CONFIG_INPUT_EVDEV=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_ADS7846=y +CONFIG_LEGACY_PTY_COUNT=4 +CONFIG_SERIAL_ATMEL=y +CONFIG_SERIAL_ATMEL_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_SPI_ATMEL=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_NOWAYOUT=y +CONFIG_AT91SAM9X_WATCHDOG=y +CONFIG_FB=y +CONFIG_FB_ATMEL=y +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_ATMEL_LCDC=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_LOGO=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_VERBOSE_PROCFS is not set +# CONFIG_SND_DRIVERS is not set +# CONFIG_SND_ARM is not set +CONFIG_SND_ATMEL_AC97C=y +# CONFIG_SND_SPI is not set +CONFIG_SND_USB_AUDIO=m +CONFIG_USB=y +CONFIG_USB_DEVICEFS=y +CONFIG_USB_MON=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_USB_GADGET=y +CONFIG_USB_ZERO=m +CONFIG_USB_ETH=m +CONFIG_USB_GADGETFS=m +CONFIG_USB_FILE_STORAGE=m +CONFIG_USB_G_SERIAL=m +CONFIG_MMC=y +CONFIG_SDIO_UART=m +CONFIG_MMC_AT91=m +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_ATMEL_PWM=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_AT91SAM9=y +CONFIG_EXT2_FS=y +CONFIG_FUSE_FS=m +CONFIG_VFAT_FS=y +CONFIG_TMPFS=y +CONFIG_JFFS2_FS=y +CONFIG_UBIFS_FS=y +CONFIG_UBIFS_FS_ADVANCED_COMPR=y +CONFIG_CRAMFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y +CONFIG_FTRACE=y +CONFIG_DEBUG_USER=y +CONFIG_XZ_DEC=y diff --git a/arch/arm/configs/at91sam9263ek_defconfig b/arch/arm/configs/at91sam9263ek_defconfig deleted file mode 100644 index 8a04d6f4e065..000000000000 --- a/arch/arm/configs/at91sam9263ek_defconfig +++ /dev/null @@ -1,106 +0,0 @@ -CONFIG_EXPERIMENTAL=y -# CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_BLK_DEV_INITRD=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_ARCH_AT91=y -CONFIG_ARCH_AT91SAM9263=y -CONFIG_MACH_AT91SAM9263EK=y -CONFIG_MTD_AT91_DATAFLASH_CARD=y -# CONFIG_ARM_THUMB is not set -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="mem=64M console=ttyS0,115200 initrd=0x21100000,3145728 root=/dev/ram0 rw" -CONFIG_FPE_NWFPE=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_DATAFLASH=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ATMEL=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_ATMEL_SSC=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_MACB=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_EVDEV=y -# CONFIG_KEYBOARD_ATKBD is not set -CONFIG_KEYBOARD_GPIO=y -# CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_TOUCHSCREEN_ADS7846=y -# CONFIG_SERIO is not set -CONFIG_SERIAL_ATMEL=y -CONFIG_SERIAL_ATMEL_CONSOLE=y -CONFIG_HW_RANDOM=y -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_GPIO=y -CONFIG_SPI=y -CONFIG_SPI_ATMEL=y -# CONFIG_HWMON is not set -CONFIG_WATCHDOG=y -CONFIG_WATCHDOG_NOWAYOUT=y -CONFIG_AT91SAM9X_WATCHDOG=y -CONFIG_FB=y -CONFIG_FB_ATMEL=y -# CONFIG_VGA_CONSOLE is not set -# CONFIG_USB_HID is not set -CONFIG_USB=y -CONFIG_USB_DEVICEFS=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=y -CONFIG_USB_GADGET=y -CONFIG_USB_ZERO=m -CONFIG_USB_GADGETFS=m -CONFIG_USB_FILE_STORAGE=m -CONFIG_USB_G_SERIAL=m -CONFIG_MMC=y -CONFIG_MMC_AT91=m -CONFIG_RTC_CLASS=y -CONFIG_RTC_DRV_AT91SAM9=y -CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_ISO8859_1=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_USER=y -CONFIG_DEBUG_LL=y diff --git a/arch/arm/configs/at91x40_defconfig b/arch/arm/configs/at91x40_defconfig new file mode 100644 index 000000000000..c55e9212fcbb --- /dev/null +++ b/arch/arm/configs/at91x40_defconfig @@ -0,0 +1,48 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_EMBEDDED=y +# CONFIG_HOTPLUG is not set +# CONFIG_ELF_CORE is not set +# CONFIG_FUTEX is not set +# CONFIG_TIMERFD is not set +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLAB=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_MMU is not set +CONFIG_ARCH_AT91=y +CONFIG_ARCH_AT91X40=y +CONFIG_MACH_AT91EB01=y +CONFIG_AT91_EARLY_USART0=y +CONFIG_CPU_ARM7TDMI=y +CONFIG_SET_MEM_PARAM=y +CONFIG_DRAM_BASE=0x01000000 +CONFIG_DRAM_SIZE=0x00400000 +CONFIG_FLASH_MEM_BASE=0x01400000 +CONFIG_PROCESSOR_ID=0x14000040 +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_BINFMT_FLAT=y +# CONFIG_SUSPEND is not set +# CONFIG_FW_LOADER is not set +CONFIG_MTD=y +CONFIG_MTD_PARTITIONS=y +CONFIG_MTD_CHAR=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_RAM=y +CONFIG_MTD_ROM=y +CONFIG_BLK_DEV_RAM=y +# CONFIG_INPUT is not set +# CONFIG_SERIO is not set +# CONFIG_VT is not set +# CONFIG_DEVKMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EXT2_FS=y +# CONFIG_DNOTIFY is not set +CONFIG_ROMFS_FS=y +# CONFIG_ENABLE_MUST_CHECK is not set diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 889922ad229c..67b5abb6f857 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -157,7 +157,7 @@ CONFIG_LEDS_GPIO=m CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_XFS_FS=m diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index 54bf5eec8016..40db34cf2771 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -8,8 +8,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_DOVE=y CONFIG_MACH_DOVE_DB=y -CONFIG_CPU_V6=y -CONFIG_CPU_32v6K=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_AEABI=y @@ -44,7 +42,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 -# CONFIG_MISC_DEVICES is not set # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set @@ -59,12 +56,12 @@ CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_MOUSE_PS2 is not set # CONFIG_SERIO is not set +CONFIG_LEGACY_PTY_COUNT=16 # CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_LEGACY_PTY_COUNT=16 # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -72,12 +69,10 @@ CONFIG_I2C_MV64XXX=y CONFIG_SPI=y CONFIG_SPI_ORION=y # CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set CONFIG_USB=y CONFIG_USB_DEVICEFS=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y -CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_STORAGE=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MV=y @@ -86,7 +81,6 @@ CONFIG_MV_XOR=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_UDF_FS=m @@ -110,23 +104,19 @@ CONFIG_DEBUG_KERNEL=y CONFIG_TIMER_STATS=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_ERRORS=y CONFIG_CRYPTO_NULL=y -CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD4=y -CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_BLOWFISH=y -CONFIG_CRYPTO_DES=y CONFIG_CRYPTO_TEA=y CONFIG_CRYPTO_TWOFISH=y CONFIG_CRYPTO_DEFLATE=y diff --git a/arch/arm/configs/exynos4_defconfig b/arch/arm/configs/exynos4_defconfig index 2ffba24d2e2a..da53ff3b4d70 100644 --- a/arch/arm/configs/exynos4_defconfig +++ b/arch/arm/configs/exynos4_defconfig @@ -8,7 +8,9 @@ CONFIG_ARCH_EXYNOS4=y CONFIG_S3C_LOWLEVEL_UART_PORT=1 CONFIG_MACH_SMDKC210=y CONFIG_MACH_SMDKV310=y +CONFIG_MACH_ARMLEX4210=y CONFIG_MACH_UNIVERSAL_C210=y +CONFIG_MACH_NURI=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y diff --git a/arch/arm/configs/mx1_defconfig b/arch/arm/configs/mx1_defconfig index b39b5ced8a10..c9436d0bf593 100644 --- a/arch/arm/configs/mx1_defconfig +++ b/arch/arm/configs/mx1_defconfig @@ -15,6 +15,7 @@ CONFIG_ARCH_MXC=y CONFIG_ARCH_MX1=y CONFIG_ARCH_MX1ADS=y CONFIG_MACH_SCB9328=y +CONFIG_MACH_APF9328=y CONFIG_MXC_IRQ_PRIOR=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y diff --git a/arch/arm/configs/mx51_defconfig b/arch/arm/configs/mx51_defconfig index e3c903281f70..0ace16cba9b5 100644 --- a/arch/arm/configs/mx51_defconfig +++ b/arch/arm/configs/mx51_defconfig @@ -13,7 +13,7 @@ CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_MXC=y -CONFIG_ARCH_MX5=y +CONFIG_ARCH_MX51=y CONFIG_MACH_MX51_BABBAGE=y CONFIG_MACH_MX51_3DS=y CONFIG_MACH_EUKREA_CPUIMX51=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig new file mode 100644 index 000000000000..2bf224310fb4 --- /dev/null +++ b/arch/arm/configs/mxs_defconfig @@ -0,0 +1,129 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_TASKSTATS=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_XACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_PERF_EVENTS=y +# CONFIG_COMPAT_BRK is not set +CONFIG_MODULES=y +CONFIG_MODULE_FORCE_LOAD=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_BLK_DEV_INTEGRITY=y +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +CONFIG_ARCH_MXS=y +CONFIG_MACH_STMP378X_DEVB=y +CONFIG_MACH_TX28=y +# CONFIG_ARM_THUMB is not set +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_PREEMPT_VOLUNTARY=y +CONFIG_AEABI=y +CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 +CONFIG_AUTO_ZRELADDR=y +CONFIG_FPE_NWFPE=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_SYN_COOKIES=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +# CONFIG_INET_DIAG is not set +# CONFIG_IPV6 is not set +CONFIG_CAN=m +CONFIG_CAN_RAW=m +CONFIG_CAN_BCM=m +CONFIG_CAN_DEV=m +CONFIG_CAN_FLEXCAN=m +# CONFIG_WIRELESS is not set +CONFIG_DEVTMPFS=y +# CONFIG_FIRMWARE_IN_KERNEL is not set +# CONFIG_BLK_DEV is not set +CONFIG_NETDEVICES=y +CONFIG_NET_ETHERNET=y +CONFIG_ENC28J60=y +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set +# CONFIG_WLAN is not set +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_TSC2007=m +# CONFIG_SERIO is not set +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_DEVKMEM is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_HW_RANDOM is not set +CONFIG_I2C=m +# CONFIG_I2C_COMPAT is not set +CONFIG_I2C_CHARDEV=m +CONFIG_I2C_MXS=m +CONFIG_SPI=y +CONFIG_SPI_GPIO=m +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y +# CONFIG_HWMON is not set +# CONFIG_MFD_SUPPORT is not set +CONFIG_DISPLAY_SUPPORT=m +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_MMC=y +CONFIG_MMC_MXS=y +CONFIG_RTC_CLASS=m +CONFIG_RTC_DRV_DS1307=m +CONFIG_DMADEVICES=y +CONFIG_MXS_DMA=y +CONFIG_EXT3_FS=y +# CONFIG_DNOTIFY is not set +CONFIG_FSCACHE=m +CONFIG_FSCACHE_STATS=y +CONFIG_CACHEFILES=m +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_V3_ACL=y +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_PRINTK_TIME=y +CONFIG_FRAME_WARN=2048 +CONFIG_MAGIC_SYSRQ=y +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_TIMER_STATS=y +CONFIG_PROVE_LOCKING=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +CONFIG_DEBUG_INFO=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_STRICT_DEVMEM=y +CONFIG_DEBUG_USER=y +CONFIG_CRYPTO=y +CONFIG_CRYPTO_CRC32C=m +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_HW is not set +CONFIG_CRC_ITU_T=m +CONFIG_CRC7=m diff --git a/arch/arm/configs/neocore926_defconfig b/arch/arm/configs/neocore926_defconfig deleted file mode 100644 index 462dd1850d15..000000000000 --- a/arch/arm/configs/neocore926_defconfig +++ /dev/null @@ -1,104 +0,0 @@ -CONFIG_EXPERIMENTAL=y -# CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_BLK_DEV_INITRD=y -# CONFIG_COMPAT_BRK is not set -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_ARCH_AT91=y -CONFIG_ARCH_AT91SAM9263=y -CONFIG_MACH_NEOCORE926=y -CONFIG_MTD_AT91_DATAFLASH_CARD=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_FPE_NWFPE=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_NET_KEY=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_NET_IPIP=y -# CONFIG_INET_LRO is not set -CONFIG_IPV6=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -CONFIG_NFTL=y -CONFIG_NFTL_RW=y -CONFIG_MTD_BLOCK2MTD=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ECC_SMC=y -CONFIG_MTD_NAND_VERIFY_WRITE=y -CONFIG_MTD_NAND_ATMEL=y -CONFIG_MTD_NAND_PLATFORM=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_ATMEL_PWM=y -CONFIG_ATMEL_TCLIB=y -CONFIG_SCSI=y -CONFIG_CHR_DEV_SG=y -CONFIG_NETDEVICES=y -CONFIG_SMSC_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_EVDEV=y -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_TOUCHSCREEN_ADS7846=y -CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_DEVKMEM is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_SERIAL_ATMEL=y -CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_SERIAL_ATMEL_PDC is not set -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_SPI=y -CONFIG_SPI_ATMEL=y -# CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y -CONFIG_FB=y -CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_LCD_CLASS_DEVICE=y -CONFIG_BACKLIGHT_CLASS_DEVICE=y -CONFIG_BACKLIGHT_ATMEL_LCDC=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y -CONFIG_LOGO=y -CONFIG_USB=y -CONFIG_USB_DEVICEFS=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=y -CONFIG_MMC=y -CONFIG_SDIO_UART=y -CONFIG_MMC_AT91=m -CONFIG_EXT2_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_AUTOFS_FS=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_FS_WBUF_VERIFY=y -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -# CONFIG_ENABLE_WARN_DEPRECATED is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -# CONFIG_CRYPTO_HW is not set diff --git a/arch/arm/configs/netx_defconfig b/arch/arm/configs/netx_defconfig index 316af5479d90..9c0ad7993986 100644 --- a/arch/arm/configs/netx_defconfig +++ b/arch/arm/configs/netx_defconfig @@ -60,7 +60,7 @@ CONFIG_FB_ARMCLCD=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_INOTIFY=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y diff --git a/arch/arm/configs/ns9xxx_defconfig b/arch/arm/configs/ns9xxx_defconfig deleted file mode 100644 index 1f528a002983..000000000000 --- a/arch/arm/configs/ns9xxx_defconfig +++ /dev/null @@ -1,56 +0,0 @@ -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_ARCH_NS9XXX=y -CONFIG_MACH_CC9P9360DEV=y -CONFIG_MACH_CC9P9360JS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_FPE_NWFPE=y -CONFIG_NET=y -CONFIG_PACKET=m -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_SYN_COOKIES=y -CONFIG_MTD=m -CONFIG_MTD_CONCAT=m -CONFIG_MTD_CHAR=m -CONFIG_MTD_BLOCK=m -CONFIG_MTD_CFI=m -CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_CFI_AMDSTD=m -CONFIG_MTD_PHYSMAP=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=m -CONFIG_I2C_GPIO=m -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=m -CONFIG_LEDS_GPIO=m -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_TIMER=m -CONFIG_LEDS_TRIGGER_HEARTBEAT=m -CONFIG_RTC_CLASS=m -CONFIG_EXT2_FS=m -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=m -CONFIG_NFS_FS=y -CONFIG_ROOT_NFS=y -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_USER=y -CONFIG_DEBUG_ERRORS=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 076db52ff672..d5f00d7eb075 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -21,58 +21,22 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ARCH_OMAP=y -CONFIG_ARCH_OMAP2=y -CONFIG_ARCH_OMAP3=y -CONFIG_ARCH_OMAP4=y CONFIG_OMAP_RESET_CLOCKS=y CONFIG_OMAP_MUX_DEBUG=y -CONFIG_OMAP_32K_TIMER=y -CONFIG_MACH_OMAP_GENERIC=y -CONFIG_ARCH_OMAP2420=y -CONFIG_ARCH_OMAP2430=y -CONFIG_ARCH_OMAP3430=y -CONFIG_MACH_OMAP_H4=y -CONFIG_MACH_OMAP_APOLLON=y -CONFIG_MACH_OMAP_2430SDP=y -CONFIG_MACH_OMAP3_BEAGLE=y -CONFIG_MACH_DEVKIT8000=y -CONFIG_MACH_OMAP_LDP=y -CONFIG_MACH_OVERO=y -CONFIG_MACH_OMAP3EVM=y -CONFIG_MACH_OMAP3517EVM=y -CONFIG_MACH_OMAP3_PANDORA=y -CONFIG_MACH_OMAP3_TOUCHBOOK=y -CONFIG_MACH_OMAP_3430SDP=y -CONFIG_MACH_NOKIA_N8X0=y -CONFIG_MACH_NOKIA_RX51=y -CONFIG_MACH_OMAP_ZOOM2=y -CONFIG_MACH_OMAP_ZOOM3=y -CONFIG_MACH_CM_T35=y -CONFIG_MACH_IGEP0020=y -CONFIG_MACH_SBC3530=y -CONFIG_MACH_OMAP_3630SDP=y -CONFIG_MACH_OMAP_4430SDP=y CONFIG_ARM_THUMBEE=y -CONFIG_ARM_L1_CACHE_SHIFT=5 CONFIG_ARM_ERRATA_411920=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -# CONFIG_LOCAL_TIMERS is not set -CONFIG_AEABI=y CONFIG_LEDS=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="root=/dev/mmcblk0p2 rootwait console=ttyO2,115200" CONFIG_KEXEC=y CONFIG_FPE_NWFPE=y -CONFIG_VFP=y -CONFIG_NEON=y CONFIG_BINFMT_MISC=y -CONFIG_PM=y CONFIG_PM_DEBUG=y -CONFIG_PM_RUNTIME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -89,14 +53,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_IPV6 is not set CONFIG_NETFILTER=y CONFIG_BT=m -CONFIG_BT_L2CAP=m -CONFIG_BT_SCO=m -CONFIG_BT_RFCOMM=y -CONFIG_BT_RFCOMM_TTY=y -CONFIG_BT_BNEP=m -CONFIG_BT_BNEP_MC_FILTER=y -CONFIG_BT_BNEP_PROTO_FILTER=y -CONFIG_BT_HIDP=m CONFIG_BT_HCIUART=m CONFIG_BT_HCIUART_H4=y CONFIG_BT_HCIUART_BCSP=y @@ -107,11 +63,9 @@ CONFIG_CFG80211=m CONFIG_MAC80211=m CONFIG_MAC80211_RC_PID=y CONFIG_MAC80211_RC_DEFAULT_PID=y -CONFIG_MAC80211_LEDS=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y @@ -127,7 +81,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_EEPROM_LEGACY=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_SCSI_MULTI_LUN=y @@ -158,19 +111,15 @@ CONFIG_TOUCHSCREEN_ADS7846=y CONFIG_INPUT_MISC=y CONFIG_INPUT_TWL4030_PWRBUTTON=y CONFIG_VT_HW_CONSOLE_BINDING=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250_NR_UARTS=32 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y -CONFIG_I2C=y CONFIG_I2C_CHARDEV=y -CONFIG_I2C_OMAP=y CONFIG_SPI=y CONFIG_SPI_OMAP24XX=y CONFIG_DEBUG_GPIO=y @@ -181,10 +130,6 @@ CONFIG_POWER_SUPPLY=y CONFIG_WATCHDOG=y CONFIG_OMAP_WATCHDOG=y CONFIG_TWL4030_WATCHDOG=y -CONFIG_MENELAUS=y -CONFIG_TWL4030_CORE=y -CONFIG_TWL4030_POWER=y -CONFIG_REGULATOR=y CONFIG_REGULATOR_TWL4030=y CONFIG_REGULATOR_TPS65023=y CONFIG_REGULATOR_TPS6507X=y @@ -208,7 +153,6 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=y CONFIG_DISPLAY_SUPPORT=y -# CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_FONTS=y @@ -217,25 +161,20 @@ CONFIG_FONT_8x16=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_MIXER_OSS=y -CONFIG_SND_PCM_OSS=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m CONFIG_SND_VERBOSE_PRINTK=y CONFIG_SND_DEBUG=y -CONFIG_SND_USB_AUDIO=y -CONFIG_SND_SOC=y -CONFIG_SND_OMAP_SOC=y -CONFIG_SND_OMAP_SOC_OMAP3_PANDORA=y +CONFIG_SND_USB_AUDIO=m +CONFIG_SND_SOC=m +CONFIG_SND_OMAP_SOC=m +CONFIG_SND_OMAP_SOC_OMAP3_PANDORA=m CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_DEVICEFS=y CONFIG_USB_SUSPEND=y -# CONFIG_USB_OTG_WHITELIST is not set CONFIG_USB_MON=y -# CONFIG_USB_MUSB_HDRC is not set -# CONFIG_USB_MUSB_OTG is not set -# CONFIG_USB_GADGET_MUSB_HDRC is not set -CONFIG_USB_MUSB_DEBUG=y CONFIG_USB_WDM=y CONFIG_USB_STORAGE=y CONFIG_USB_LIBUSUAL=y @@ -250,18 +189,12 @@ CONFIG_MMC_UNSAFE_RESUME=y CONFIG_SDIO_UART=y CONFIG_MMC_OMAP=y CONFIG_MMC_OMAP_HS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TWL92330=y CONFIG_RTC_DRV_TWL4030=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_MSDOS_FS=y @@ -285,12 +218,10 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_PROVE_LOCKING=y -# CONFIG_LOCK_STAT is not set CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y diff --git a/arch/arm/configs/realview-smp_defconfig b/arch/arm/configs/realview-smp_defconfig index 5ca7a61f7c01..abe61bf379d2 100644 --- a/arch/arm/configs/realview-smp_defconfig +++ b/arch/arm/configs/realview-smp_defconfig @@ -38,7 +38,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_ARM_INTEGRATOR=y +CONFIG_MTD_PHYSMAP=y CONFIG_ARM_CHARLCD=y CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y diff --git a/arch/arm/configs/realview_defconfig b/arch/arm/configs/realview_defconfig index fcaa60328051..7079cbe898a8 100644 --- a/arch/arm/configs/realview_defconfig +++ b/arch/arm/configs/realview_defconfig @@ -37,7 +37,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_ARM_INTEGRATOR=y +CONFIG_MTD_PHYSMAP=y CONFIG_ARM_CHARLCD=y CONFIG_NETDEVICES=y CONFIG_SMSC_PHY=y diff --git a/arch/arm/configs/s5p6442_defconfig b/arch/arm/configs/s5p6442_defconfig deleted file mode 100644 index 0e92a784af66..000000000000 --- a/arch/arm/configs/s5p6442_defconfig +++ /dev/null @@ -1,65 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_ALL=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_ARCH_S5P6442=y -CONFIG_S3C_LOWLEVEL_UART_PORT=1 -CONFIG_MACH_SMDK6442=y -CONFIG_CPU_32v6K=y -CONFIG_AEABI=y -CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x20800000,8M console=ttySAC1,115200 init=/linuxrc" -CONFIG_FPE_NWFPE=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -# CONFIG_MISC_DEVICES is not set -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_SG=y -CONFIG_INPUT_EVDEV=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_NR_UARTS=3 -CONFIG_SERIAL_SAMSUNG=y -CONFIG_SERIAL_SAMSUNG_CONSOLE=y -CONFIG_HW_RANDOM=y -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_CRAMFS=y -CONFIG_ROMFS_FS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_BSD_DISKLABEL=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=y -CONFIG_NLS_ISO8859_1=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -# CONFIG_ARM_UNWIND is not set -CONFIG_DEBUG_USER=y -CONFIG_DEBUG_ERRORS=y -CONFIG_DEBUG_LL=y -CONFIG_DEBUG_S3C_UART=1 -CONFIG_CRC_CCITT=y diff --git a/arch/arm/configs/spear300_defconfig b/arch/arm/configs/spear300_defconfig deleted file mode 100644 index cf29f3e56922..000000000000 --- a/arch/arm/configs/spear300_defconfig +++ /dev/null @@ -1,51 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_PLAT_SPEAR=y -CONFIG_BINFMT_MISC=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_INPUT_FF_MEMLESS=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_SERIAL_AMBA_PL011=y -CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_HW_RANDOM is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=8192 -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_PL061=y -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_AUTOFS4_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_IOCHARSET="ascii" -CONFIG_TMPFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y -# CONFIG_CRC32 is not set diff --git a/arch/arm/configs/spear310_defconfig b/arch/arm/configs/spear310_defconfig deleted file mode 100644 index 824e44418b18..000000000000 --- a/arch/arm/configs/spear310_defconfig +++ /dev/null @@ -1,52 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_PLAT_SPEAR=y -CONFIG_MACH_SPEAR310=y -CONFIG_BINFMT_MISC=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_INPUT_FF_MEMLESS=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_SERIAL_AMBA_PL011=y -CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_HW_RANDOM is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=8192 -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_PL061=y -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_AUTOFS4_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_IOCHARSET="ascii" -CONFIG_TMPFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y -# CONFIG_CRC32 is not set diff --git a/arch/arm/configs/spear320_defconfig b/arch/arm/configs/spear320_defconfig deleted file mode 100644 index 842f7f3c512a..000000000000 --- a/arch/arm/configs/spear320_defconfig +++ /dev/null @@ -1,52 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_PLAT_SPEAR=y -CONFIG_MACH_SPEAR320=y -CONFIG_BINFMT_MISC=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_INPUT_FF_MEMLESS=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_SERIAL_AMBA_PL011=y -CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set -# CONFIG_HW_RANDOM is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=8192 -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_PL061=y -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_AUTOFS4_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_IOCHARSET="ascii" -CONFIG_TMPFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y -# CONFIG_CRC32 is not set diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig new file mode 100644 index 000000000000..fea7e1f026a3 --- /dev/null +++ b/arch/arm/configs/spear3xx_defconfig @@ -0,0 +1,53 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PLAT_SPEAR=y +CONFIG_BOARD_SPEAR300_EVB=y +CONFIG_BOARD_SPEAR310_EVB=y +CONFIG_BOARD_SPEAR320_EVB=y +CONFIG_BINFMT_MISC=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_LEGACY_PTYS is not set +# CONFIG_HW_RANDOM is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=8192 +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_PL061=y +# CONFIG_HWMON is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_AUTOFS4_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_TMPFS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +CONFIG_DEBUG_INFO=y +# CONFIG_CRC32 is not set diff --git a/arch/arm/configs/spear600_defconfig b/arch/arm/configs/spear600_defconfig deleted file mode 100644 index 6777c11f63e7..000000000000 --- a/arch/arm/configs/spear600_defconfig +++ /dev/null @@ -1,49 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_PLAT_SPEAR=y -CONFIG_ARCH_SPEAR6XX=y -CONFIG_BINFMT_MISC=y -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 -CONFIG_INPUT_FF_MEMLESS=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_SERIAL_AMBA_PL011=y -CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set -CONFIG_RAW_DRIVER=y -CONFIG_MAX_RAW_DEVS=8192 -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_PL061=y -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_SECURITY=y -CONFIG_AUTOFS4_FS=m -CONFIG_MSDOS_FS=m -CONFIG_VFAT_FS=m -CONFIG_FAT_DEFAULT_IOCHARSET="ascii" -CONFIG_TMPFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_NLS=y -CONFIG_NLS_DEFAULT="utf8" -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ASCII=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_INFO=y -# CONFIG_CRC32 is not set diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig new file mode 100644 index 000000000000..cef2e836afd2 --- /dev/null +++ b/arch/arm/configs/spear6xx_defconfig @@ -0,0 +1,49 @@ +CONFIG_EXPERIMENTAL=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PLAT_SPEAR=y +CONFIG_ARCH_SPEAR6XX=y +CONFIG_BOARD_SPEAR600_EVB=y +CONFIG_BINFMT_MISC=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_INPUT_FF_MEMLESS=y +# CONFIG_INPUT_MOUSEDEV_PSAUX is not set +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +# CONFIG_LEGACY_PTYS is not set +CONFIG_RAW_DRIVER=y +CONFIG_MAX_RAW_DEVS=8192 +CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_PL061=y +# CONFIG_HWMON is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_AUTOFS4_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_FAT_DEFAULT_IOCHARSET="ascii" +CONFIG_TMPFS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ASCII=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +CONFIG_DEBUG_INFO=y +# CONFIG_CRC32 is not set diff --git a/arch/arm/configs/stmp378x_defconfig b/arch/arm/configs/stmp378x_defconfig deleted file mode 100644 index 1079c2b6eb3a..000000000000 --- a/arch/arm/configs/stmp378x_defconfig +++ /dev/null @@ -1,128 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_LOCALVERSION="-default" -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_ARCH_STMP3XXX=y -CONFIG_ARCH_STMP378X=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y -CONFIG_AEABI=y -CONFIG_HIGHMEM=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="console=ttySDBG0,115200 mem=32M" -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set -# CONFIG_IPV6 is not set -CONFIG_NET_SCHED=y -# CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_STANDALONE is not set -CONFIG_MTD=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_NAND=y -CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_GLUEBI=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=4 -CONFIG_BLK_DEV_RAM_SIZE=6144 -# CONFIG_MISC_DEVICES is not set -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_SG=y -# CONFIG_SCSI_LOWLEVEL is not set -CONFIG_INPUT_POLLDEV=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=320 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=240 -CONFIG_INPUT_EVDEV=y -# CONFIG_KEYBOARD_ATKBD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_INPUT_MISC=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_LEGACY_PTYS is not set -CONFIG_HW_RANDOM=y -CONFIG_DEBUG_GPIO=y -CONFIG_GPIO_SYSFS=y -# CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_LCD_CLASS_DEVICE=y -CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -# CONFIG_DNOTIFY is not set -CONFIG_TMPFS=y -CONFIG_CONFIGFS_FS=m -# CONFIG_MISC_FILESYSTEMS is not set -# CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_STRIP_ASM_SYMS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SHIRQ=y -# CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_SELFTEST=y -CONFIG_DEBUG_OBJECTS_FREE=y -CONFIG_DEBUG_OBJECTS_TIMERS=y -CONFIG_DEBUG_SLAB=y -CONFIG_DEBUG_SLAB_LEAK=y -CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_PROVE_LOCKING=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_KOBJECT=y -# CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_BOOT_TRACER=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y -CONFIG_SECURITY=y -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=m -CONFIG_CRC16=y diff --git a/arch/arm/configs/stmp37xx_defconfig b/arch/arm/configs/stmp37xx_defconfig deleted file mode 100644 index 564a5cc44085..000000000000 --- a/arch/arm/configs/stmp37xx_defconfig +++ /dev/null @@ -1,108 +0,0 @@ -CONFIG_EXPERIMENTAL=y -CONFIG_LOCALVERSION="-default" -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_BLK_DEV_INITRD=y -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_ARCH_STMP3XXX=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_PREEMPT=y -CONFIG_AEABI=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="console=ttySDBG0,115200 mem=32M lcd_panel=lms350 rdinit=/bin/sh ignore_loglevel" -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -CONFIG_SYN_COOKIES=y -# CONFIG_INET_LRO is not set -# CONFIG_IPV6 is not set -CONFIG_NET_SCHED=y -# CONFIG_WIRELESS is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_STANDALONE is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_CRYPTOLOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=4 -CONFIG_BLK_DEV_RAM_SIZE=6144 -# CONFIG_MISC_DEVICES is not set -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_CHR_DEV_SG=y -# CONFIG_SCSI_LOWLEVEL is not set -CONFIG_INPUT_POLLDEV=y -CONFIG_INPUT_MOUSEDEV_SCREEN_X=320 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=240 -CONFIG_INPUT_EVDEV=y -# CONFIG_KEYBOARD_ATKBD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_INPUT_TOUCHSCREEN=y -CONFIG_INPUT_MISC=y -# CONFIG_SERIO_SERPORT is not set -CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_LEGACY_PTYS is not set -CONFIG_HW_RANDOM=y -CONFIG_DEBUG_GPIO=y -CONFIG_GPIO_SYSFS=y -# CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_LCD_CLASS_DEVICE=y -CONFIG_BACKLIGHT_CLASS_DEVICE=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set -# CONFIG_USB_SUPPORT is not set -# CONFIG_DNOTIFY is not set -CONFIG_TMPFS=y -CONFIG_CONFIGFS_FS=m -# CONFIG_MISC_FILESYSTEMS is not set -# CONFIG_NETWORK_FILESYSTEMS is not set -# CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_DEBUG_KERNEL=y -# CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_BOOT_TRACER=y -CONFIG_STACK_TRACER=y -CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_DEBUG_LL=y -CONFIG_KEYS=y -CONFIG_KEYS_DEBUG_PROC_KEYS=y -CONFIG_SECURITY=y -CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_DES=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y -# CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_CCITT=m -CONFIG_CRC16=y diff --git a/arch/arm/configs/usb-a9263_defconfig b/arch/arm/configs/usb-a9263_defconfig deleted file mode 100644 index ee82d09249c6..000000000000 --- a/arch/arm/configs/usb-a9263_defconfig +++ /dev/null @@ -1,106 +0,0 @@ -CONFIG_EXPERIMENTAL=y -# CONFIG_LOCALVERSION_AUTO is not set -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SLAB=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -CONFIG_ARCH_AT91=y -CONFIG_ARCH_AT91SAM9263=y -CONFIG_MACH_USB_A9263=y -CONFIG_AT91_SLOW_CLOCK=y -# CONFIG_ARM_THUMB is not set -CONFIG_AEABI=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="mem=64M console=ttyS0,115200" -CONFIG_FPE_NWFPE=y -CONFIG_PM=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_IP_PNP_RARP=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_LRO is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_DATAFLASH=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ATMEL=y -CONFIG_MTD_NAND_ATMEL_ECC_SOFT=y -CONFIG_BLK_DEV_LOOP=y -# CONFIG_MISC_DEVICES is not set -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y -CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_MACB=y -# CONFIG_INPUT_MOUSEDEV_PSAUX is not set -CONFIG_INPUT_EVDEV=y -CONFIG_INPUT_EVBUG=y -# CONFIG_KEYBOARD_ATKBD is not set -CONFIG_KEYBOARD_GPIO=y -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_SERIAL_ATMEL=y -CONFIG_SERIAL_ATMEL_CONSOLE=y -CONFIG_HW_RANDOM=y -CONFIG_SPI=y -CONFIG_SPI_ATMEL=y -# CONFIG_HWMON is not set -# CONFIG_VGA_CONSOLE is not set -# CONFIG_USB_HID is not set -CONFIG_USB=y -CONFIG_USB_DEVICEFS=y -CONFIG_USB_MON=y -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_STORAGE=y -CONFIG_USB_GADGET=y -CONFIG_USB_ETH=m -CONFIG_NEW_LEDS=y -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y -CONFIG_FUSE_FS=m -CONFIG_VFAT_FS=y -CONFIG_TMPFS=y -CONFIG_JFFS2_FS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_ISO8859_1=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_USER=y -CONFIG_DEBUG_LL=y -# CONFIG_CRYPTO_HW is not set diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index 0ce710f47500..cdd4d2bd3962 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -32,7 +32,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_ARM_INTEGRATOR=y +CONFIG_MTD_PHYSMAP=y CONFIG_BLK_DEV_RAM=y CONFIG_EEPROM_LEGACY=m CONFIG_NETDEVICES=y diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig index 8b0c717378fa..1d01ddd33122 100644 --- a/arch/arm/configs/viper_defconfig +++ b/arch/arm/configs/viper_defconfig @@ -142,7 +142,7 @@ CONFIG_USB_GADGETFS=m CONFIG_USB_FILE_STORAGE=m CONFIG_USB_G_SERIAL=m CONFIG_USB_G_PRINTER=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=m CONFIG_RTC_DRV_SA1100=m CONFIG_EXT2_FS=m diff --git a/arch/arm/configs/xcep_defconfig b/arch/arm/configs/xcep_defconfig index 5b5504143647..721832ffe2d7 100644 --- a/arch/arm/configs/xcep_defconfig +++ b/arch/arm/configs/xcep_defconfig @@ -73,7 +73,7 @@ CONFIG_SENSORS_MAX6650=m # CONFIG_VGA_CONSOLE is not set # CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_SA1100=m CONFIG_DMADEVICES=y # CONFIG_DNOTIFY is not set diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig index 960f65514d88..59577ad3f4ef 100644 --- a/arch/arm/configs/zeus_defconfig +++ b/arch/arm/configs/zeus_defconfig @@ -158,7 +158,7 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=m CONFIG_LEDS_TRIGGER_BACKLIGHT=m CONFIG_LEDS_TRIGGER_GPIO=m CONFIG_LEDS_TRIGGER_DEFAULT_ON=m -CONFIG_RTC_CLASS=m +CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_ISL1208=m CONFIG_RTC_DRV_PXA=m CONFIG_EXT2_FS=y diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 6b7403fd8f54..b4892a06442c 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -203,8 +203,6 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); #define find_first_bit(p,sz) _find_first_bit_le(p,sz) #define find_next_bit(p,sz,off) _find_next_bit_le(p,sz,off) -#define WORD_BITOFF_TO_LE(x) ((x)) - #else /* * These are the big endian, atomic definitions. @@ -214,8 +212,6 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); #define find_first_bit(p,sz) _find_first_bit_be(p,sz) #define find_next_bit(p,sz,off) _find_next_bit_be(p,sz,off) -#define WORD_BITOFF_TO_LE(x) ((x) ^ 0x18) - #endif #if __LINUX_ARM_ARCH__ < 5 @@ -287,55 +283,29 @@ static inline int fls(int x) #include #include -static inline void __set_bit_le(int nr, void *addr) -{ - __set_bit(WORD_BITOFF_TO_LE(nr), addr); -} - -static inline void __clear_bit_le(int nr, void *addr) -{ - __clear_bit(WORD_BITOFF_TO_LE(nr), addr); -} - -static inline int __test_and_set_bit_le(int nr, void *addr) -{ - return __test_and_set_bit(WORD_BITOFF_TO_LE(nr), addr); -} - -static inline int test_and_set_bit_le(int nr, void *addr) -{ - return test_and_set_bit(WORD_BITOFF_TO_LE(nr), addr); -} - -static inline int __test_and_clear_bit_le(int nr, void *addr) -{ - return __test_and_clear_bit(WORD_BITOFF_TO_LE(nr), addr); -} - -static inline int test_and_clear_bit_le(int nr, void *addr) -{ - return test_and_clear_bit(WORD_BITOFF_TO_LE(nr), addr); -} - -static inline int test_bit_le(int nr, const void *addr) -{ - return test_bit(WORD_BITOFF_TO_LE(nr), addr); -} +#ifdef __ARMEB__ static inline int find_first_zero_bit_le(const void *p, unsigned size) { return _find_first_zero_bit_le(p, size); } +#define find_first_zero_bit_le find_first_zero_bit_le static inline int find_next_zero_bit_le(const void *p, int size, int offset) { return _find_next_zero_bit_le(p, size, offset); } +#define find_next_zero_bit_le find_next_zero_bit_le static inline int find_next_bit_le(const void *p, int size, int offset) { return _find_next_bit_le(p, size, offset); } +#define find_next_bit_le find_next_bit_le + +#endif + +#include /* * Ext2 is defined to use little-endian byte ordering. diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index ed5bc9e05a4e..cd4458f64171 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -2,6 +2,7 @@ #define __ASM_ARM_CPUTYPE_H #include +#include #define CPUID_ID 0 #define CPUID_CACHETYPE 1 diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h index ca51143f97f1..42005542932b 100644 --- a/arch/arm/include/asm/dma.h +++ b/arch/arm/include/asm/dma.h @@ -6,8 +6,10 @@ /* * This is the maximum virtual address which can be DMA'd from. */ -#ifndef MAX_DMA_ADDRESS +#ifndef ARM_DMA_ZONE_SIZE #define MAX_DMA_ADDRESS 0xffffffff +#else +#define MAX_DMA_ADDRESS (PAGE_OFFSET + ARM_DMA_ZONE_SIZE) #endif #ifdef CONFIG_ISA_DMA_API diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index c3cd8755e648..0e9ce8d9686e 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -108,6 +108,7 @@ struct task_struct; int dump_task_regs(struct task_struct *t, elf_gregset_t *elfregs); #define ELF_CORE_COPY_TASK_REGS dump_task_regs +#define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 /* This is the location that an ET_DYN program is loaded if exec'ed. Typical diff --git a/arch/arm/include/asm/fiq.h b/arch/arm/include/asm/fiq.h index 2242ce22ec6c..d493d0b742a1 100644 --- a/arch/arm/include/asm/fiq.h +++ b/arch/arm/include/asm/fiq.h @@ -4,6 +4,13 @@ * Support for FIQ on ARM architectures. * Written by Philip Blundell , 1998 * Re-written by Russell King + * + * NOTE: The FIQ mode registers are not magically preserved across + * suspend/resume. + * + * Drivers which require these registers to be preserved across power + * management operations must implement appropriate suspend/resume handlers to + * save and restore them. */ #ifndef __ASM_FIQ_H @@ -29,9 +36,21 @@ struct fiq_handler { extern int claim_fiq(struct fiq_handler *f); extern void release_fiq(struct fiq_handler *f); extern void set_fiq_handler(void *start, unsigned int length); -extern void set_fiq_regs(struct pt_regs *regs); -extern void get_fiq_regs(struct pt_regs *regs); extern void enable_fiq(int fiq); extern void disable_fiq(int fiq); +/* helpers defined in fiqasm.S: */ +extern void __set_fiq_regs(unsigned long const *regs); +extern void __get_fiq_regs(unsigned long *regs); + +static inline void set_fiq_regs(struct pt_regs const *regs) +{ + __set_fiq_regs(®s->ARM_r8); +} + +static inline void get_fiq_regs(struct pt_regs *regs) +{ + __get_fiq_regs(®s->ARM_r8); +} + #endif diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 199a6b6de7f4..8c73900da9ed 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -3,16 +3,74 @@ #ifdef __KERNEL__ +#if defined(CONFIG_CPU_USE_DOMAINS) && defined(CONFIG_SMP) +/* ARM doesn't provide unprivileged exclusive memory accessors */ +#include +#else + +#include +#include +#include + +#define __futex_atomic_ex_table(err_reg) \ + "3:\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4f, 2b, 4f\n" \ + " .popsection\n" \ + " .pushsection .fixup,\"ax\"\n" \ + "4: mov %0, " err_reg "\n" \ + " b 3b\n" \ + " .popsection" + #ifdef CONFIG_SMP -#include +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ + smp_mb(); \ + __asm__ __volatile__( \ + "1: ldrex %1, [%2]\n" \ + " " insn "\n" \ + "2: strex %1, %0, [%2]\n" \ + " teq %1, #0\n" \ + " bne 1b\n" \ + " mov %0, #0\n" \ + __futex_atomic_ex_table("%4") \ + : "=&r" (ret), "=&r" (oldval) \ + : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ + : "cc", "memory") + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret; + u32 val; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + smp_mb(); + __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" + "1: ldrex %1, [%4]\n" + " teq %1, %2\n" + " ite eq @ explicit IT needed for the 2b label\n" + "2: strexeq %0, %3, [%4]\n" + " movne %0, #0\n" + " teq %0, #0\n" + " bne 1b\n" + __futex_atomic_ex_table("%5") + : "=&r" (ret), "=&r" (val) + : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) + : "cc", "memory"); + smp_mb(); + + *uval = val; + return ret; +} #else /* !SMP, we can work around lack of atomic ops by disabling preemption */ -#include #include -#include -#include #include #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ @@ -21,19 +79,37 @@ " " insn "\n" \ "2: " T(str) " %0, [%2]\n" \ " mov %0, #0\n" \ - "3:\n" \ - " .pushsection __ex_table,\"a\"\n" \ - " .align 3\n" \ - " .long 1b, 4f, 2b, 4f\n" \ - " .popsection\n" \ - " .pushsection .fixup,\"ax\"\n" \ - "4: mov %0, %4\n" \ - " b 3b\n" \ - " .popsection" \ + __futex_atomic_ex_table("%4") \ : "=&r" (ret), "=&r" (oldval) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ : "cc", "memory") +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + u32 val; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" + "1: " T(ldr) " %1, [%4]\n" + " teq %1, %2\n" + " it eq @ explicit IT needed for the 2b label\n" + "2: " T(streq) " %3, [%4]\n" + __futex_atomic_ex_table("%5") + : "+r" (ret), "=&r" (val) + : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) + : "cc", "memory"); + + *uval = val; + return ret; +} + +#endif /* !SMP */ + static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) { @@ -87,39 +163,6 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) return ret; } -static inline int -futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) -{ - int ret = 0; - u32 val; - - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) - return -EFAULT; - - __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" - "1: " T(ldr) " %1, [%4]\n" - " teq %1, %2\n" - " it eq @ explicit IT needed for the 2b label\n" - "2: " T(streq) " %3, [%4]\n" - "3:\n" - " .pushsection __ex_table,\"a\"\n" - " .align 3\n" - " .long 1b, 4f, 2b, 4f\n" - " .popsection\n" - " .pushsection .fixup,\"ax\"\n" - "4: mov %0, %5\n" - " b 3b\n" - " .popsection" - : "+r" (ret), "=&r" (val) - : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) - : "cc", "memory"); - - *uval = val; - return ret; -} - -#endif /* !SMP */ - +#endif /* !(CPU_USE_DOMAINS && SMP) */ #endif /* __KERNEL__ */ #endif /* _ASM_ARM_FUTEX_H */ diff --git a/arch/arm/include/asm/hardware/timer-sp.h b/arch/arm/include/asm/hardware/timer-sp.h index 21e75e30d497..4384d81eee79 100644 --- a/arch/arm/include/asm/hardware/timer-sp.h +++ b/arch/arm/include/asm/hardware/timer-sp.h @@ -1,2 +1,2 @@ -void sp804_clocksource_init(void __iomem *); -void sp804_clockevents_init(void __iomem *, unsigned int); +void sp804_clocksource_init(void __iomem *, const char *); +void sp804_clockevents_init(void __iomem *, unsigned int, const char *); diff --git a/arch/arm/include/asm/i8253.h b/arch/arm/include/asm/i8253.h new file mode 100644 index 000000000000..70656b69d5ce --- /dev/null +++ b/arch/arm/include/asm/i8253.h @@ -0,0 +1,15 @@ +#ifndef __ASMARM_I8253_H +#define __ASMARM_I8253_H + +/* i8253A PIT registers */ +#define PIT_MODE 0x43 +#define PIT_CH0 0x40 + +#define PIT_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) + +extern raw_spinlock_t i8253_lock; + +#define outb_pit outb_p +#define inb_pit inb_p + +#endif diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index bb8a19bd5822..e46bdd0097eb 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -39,10 +39,13 @@ typedef u32 kprobe_opcode_t; struct kprobe; typedef void (kprobe_insn_handler_t)(struct kprobe *, struct pt_regs *); +typedef unsigned long (kprobe_check_cc)(unsigned long); + /* Architecture specific copy of original instruction. */ struct arch_specific_insn { kprobe_opcode_t *insn; kprobe_insn_handler_t *insn_handler; + kprobe_check_cc *insn_check_cc; }; struct prev_kprobe { diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index bf13b814c1b8..946f4d778f71 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -18,6 +18,8 @@ struct machine_desc { unsigned int nr; /* architecture number */ const char *name; /* architecture name */ unsigned long boot_params; /* tagged list */ + const char **dt_compat; /* array of device tree + * 'compatible' strings */ unsigned int nr_irqs; /* number of IRQs */ @@ -47,6 +49,13 @@ struct machine_desc { */ extern struct machine_desc *machine_desc; +/* + * Machine type table - also only accessible during boot + */ +extern struct machine_desc __arch_info_begin[], __arch_info_end[]; +#define for_each_machine_desc(p) \ + for (p = __arch_info_begin; p < __arch_info_end; p++) + /* * Set of macros to define architecture features. This is built into * a table by the linker. diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h index 883f6be5117a..d5adaae5ee2c 100644 --- a/arch/arm/include/asm/mach/time.h +++ b/arch/arm/include/asm/mach/time.h @@ -34,7 +34,6 @@ * timer interrupt which may be pending. */ struct sys_timer { - struct sys_device dev; void (*init)(void); void (*suspend)(void); void (*resume)(void); diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 431077c5a867..af44a8fb3480 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -209,14 +209,10 @@ static inline unsigned long __phys_to_virt(unsigned long x) * allocations. This must be the smallest DMA mask in the system, * so a successful GFP_DMA allocation will always satisfy this. */ -#ifndef ISA_DMA_THRESHOLD +#ifndef ARM_DMA_ZONE_SIZE #define ISA_DMA_THRESHOLD (0xffffffffULL) -#endif - -#ifndef arch_adjust_zones -#define arch_adjust_zones(size,holes) do { } while (0) -#elif !defined(CONFIG_ZONE_DMA) -#error "custom arch_adjust_zones() requires CONFIG_ZONE_DMA" +#else +#define ISA_DMA_THRESHOLD (PHYS_OFFSET + ARM_DMA_ZONE_SIZE - 1) #endif /* diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h index f51a69595f6e..ac75d0848889 100644 --- a/arch/arm/include/asm/page.h +++ b/arch/arm/include/asm/page.h @@ -197,7 +197,7 @@ typedef unsigned long pgprot_t; typedef struct page *pgtable_t; -#ifndef CONFIG_SPARSEMEM +#ifdef CONFIG_HAVE_ARCH_PFN_VALID extern int pfn_valid(unsigned long); #endif diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h new file mode 100644 index 000000000000..11b8708fc4db --- /dev/null +++ b/arch/arm/include/asm/prom.h @@ -0,0 +1,37 @@ +/* + * arch/arm/include/asm/prom.h + * + * Copyright (C) 2009 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#ifndef __ASMARM_PROM_H +#define __ASMARM_PROM_H + +#ifdef CONFIG_OF + +#include +#include + +static inline void irq_dispose_mapping(unsigned int virq) +{ + return; +} + +extern struct machine_desc *setup_machine_fdt(unsigned int dt_phys); +extern void arm_dt_memblock_reserve(void); + +#else /* CONFIG_OF */ + +static inline struct machine_desc *setup_machine_fdt(unsigned int dt_phys) +{ + return NULL; +} + +static inline void arm_dt_memblock_reserve(void) { } + +#endif /* CONFIG_OF */ +#endif /* ASMARM_PROM_H */ diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index a8ff22b2a391..312d10877bd7 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -128,6 +128,12 @@ struct pt_regs { #define ARM_r0 uregs[0] #define ARM_ORIG_r0 uregs[17] +/* + * The size of the user-visible VFP state as seen by PTRACE_GET/SETVFPREGS + * and core dumps. + */ +#define ARM_VFPREGS_SIZE ( 32 * 8 /*fpregs*/ + 4 /*fpscr*/ ) + #ifdef __KERNEL__ #define user_mode(regs) \ diff --git a/arch/arm/include/asm/setup.h b/arch/arm/include/asm/setup.h index 95176af3df8c..ee2ad8ae07af 100644 --- a/arch/arm/include/asm/setup.h +++ b/arch/arm/include/asm/setup.h @@ -217,6 +217,10 @@ extern struct meminfo meminfo; #define bank_phys_end(bank) ((bank)->start + (bank)->size) #define bank_phys_size(bank) (bank)->size +extern int arm_add_memory(phys_addr_t start, unsigned long size); +extern void early_print(const char *str, ...); +extern void dump_machine_table(void); + #endif /* __KERNEL__ */ #endif diff --git a/arch/arm/include/asm/sizes.h b/arch/arm/include/asm/sizes.h index 316bb2b2be3d..154b89b81d3e 100644 --- a/arch/arm/include/asm/sizes.h +++ b/arch/arm/include/asm/sizes.h @@ -16,44 +16,6 @@ /* Size definitions * Copyright (C) ARM Limited 1998. All rights reserved. */ +#include -#ifndef __sizes_h -#define __sizes_h 1 - -/* handy sizes */ -#define SZ_16 0x00000010 -#define SZ_32 0x00000020 -#define SZ_64 0x00000040 -#define SZ_128 0x00000080 -#define SZ_256 0x00000100 -#define SZ_512 0x00000200 - -#define SZ_1K 0x00000400 -#define SZ_2K 0x00000800 -#define SZ_4K 0x00001000 -#define SZ_8K 0x00002000 -#define SZ_16K 0x00004000 -#define SZ_32K 0x00008000 -#define SZ_64K 0x00010000 -#define SZ_128K 0x00020000 -#define SZ_256K 0x00040000 -#define SZ_512K 0x00080000 - -#define SZ_1M 0x00100000 -#define SZ_2M 0x00200000 -#define SZ_4M 0x00400000 -#define SZ_8M 0x00800000 -#define SZ_16M 0x01000000 -#define SZ_32M 0x02000000 -#define SZ_48M 0x03000000 -#define SZ_64M 0x04000000 -#define SZ_128M 0x08000000 -#define SZ_256M 0x10000000 -#define SZ_512M 0x20000000 - -#define SZ_1G 0x40000000 -#define SZ_2G 0x80000000 - -#endif - -/* END */ +#define SZ_48M (SZ_32M + SZ_16M) diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 96ed521f2408..e42d96a45d3e 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -14,20 +14,12 @@ #include #include -#include - #ifndef CONFIG_SMP # error " included in non-SMP build" #endif #define raw_smp_processor_id() (current_thread_info()->cpu) -/* - * at the moment, there's not a big penalty for changing CPUs - * (the >big< penalty is running SMP in the first place) - */ -#define PROC_CHANGE_PENALTY 15 - struct seq_file; /* @@ -47,9 +39,9 @@ extern void smp_init_cpus(void); /* - * Raise an IPI cross call on CPUs in callmap. + * Provide a function to raise an IPI cross call on CPUs in callmap. */ -extern void smp_cross_call(const struct cpumask *mask, int ipi); +extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int)); /* * Boot a secondary CPU, and assign it the specified idle task. @@ -78,6 +70,7 @@ extern void platform_smp_prepare_cpus(unsigned int); */ struct secondary_data { unsigned long pgdir; + unsigned long swapper_pg_dir; void *stack; }; extern struct secondary_data secondary_data; diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index fdd3820edff8..65fa3c88095c 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -5,6 +5,8 @@ #error SMP not supported on pre-ARMv6 CPUs #endif +#include + /* * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K * extensions, so when running on UP, we have to patch these instructions away. diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 885be097769d..832888d0c20c 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -159,7 +159,7 @@ extern unsigned int user_debug; #include #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) #define mb() do { dsb(); outer_sync(); } while (0) -#define rmb() dmb() +#define rmb() dsb() #define wmb() mb() #else #include diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 82dfe5d0c41e..265f908c4a6e 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -41,12 +41,12 @@ */ #if defined(CONFIG_SMP) || defined(CONFIG_CPU_32v7) #define tlb_fast_mode(tlb) 0 -#define FREE_PTE_NR 500 #else #define tlb_fast_mode(tlb) 1 -#define FREE_PTE_NR 0 #endif +#define MMU_GATHER_BUNDLE 8 + /* * TLB handling. This allows us to remove pages from the page * tables, and efficiently handle the TLB issues. @@ -58,7 +58,9 @@ struct mmu_gather { unsigned long range_start; unsigned long range_end; unsigned int nr; - struct page *pages[FREE_PTE_NR]; + unsigned int max; + struct page **pages; + struct page *local[MMU_GATHER_BUNDLE]; }; DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -97,26 +99,37 @@ static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) } } +static inline void __tlb_alloc_page(struct mmu_gather *tlb) +{ + unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + + if (addr) { + tlb->pages = (void *)addr; + tlb->max = PAGE_SIZE / sizeof(struct page *); + } +} + static inline void tlb_flush_mmu(struct mmu_gather *tlb) { tlb_flush(tlb); if (!tlb_fast_mode(tlb)) { free_pages_and_swap_cache(tlb->pages, tlb->nr); tlb->nr = 0; + if (tlb->pages == tlb->local) + __tlb_alloc_page(tlb); } } -static inline struct mmu_gather * -tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush) +static inline void +tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm) { - struct mmu_gather *tlb = &get_cpu_var(mmu_gathers); - tlb->mm = mm; - tlb->fullmm = full_mm_flush; + tlb->fullmm = fullmm; tlb->vma = NULL; + tlb->max = ARRAY_SIZE(tlb->local); + tlb->pages = tlb->local; tlb->nr = 0; - - return tlb; + __tlb_alloc_page(tlb); } static inline void @@ -127,7 +140,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) /* keep the page table cache within bounds */ check_pgt_cache(); - put_cpu_var(mmu_gathers); + if (tlb->pages != tlb->local) + free_pages((unsigned long)tlb->pages, 0); } /* @@ -162,15 +176,22 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) tlb_flush(tlb); } -static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) { if (tlb_fast_mode(tlb)) { free_page_and_swap_cache(page); - } else { - tlb->pages[tlb->nr++] = page; - if (tlb->nr >= FREE_PTE_NR) - tlb_flush_mmu(tlb); + return 1; /* avoid calling tlb_flush_mmu */ } + + tlb->pages[tlb->nr++] = page; + VM_BUG_ON(tlb->nr > tlb->max); + return tlb->max - tlb->nr; +} + +static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) +{ + if (!__tlb_remove_page(tlb, page)) + tlb_flush_mmu(tlb); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index c891eb76c0e3..2c04ed5efeb5 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -396,6 +396,12 @@ #define __NR_fanotify_init (__NR_SYSCALL_BASE+367) #define __NR_fanotify_mark (__NR_SYSCALL_BASE+368) #define __NR_prlimit64 (__NR_SYSCALL_BASE+369) +#define __NR_name_to_handle_at (__NR_SYSCALL_BASE+370) +#define __NR_open_by_handle_at (__NR_SYSCALL_BASE+371) +#define __NR_clock_adjtime (__NR_SYSCALL_BASE+372) +#define __NR_syncfs (__NR_SYSCALL_BASE+373) +#define __NR_sendmmsg (__NR_SYSCALL_BASE+374) +#define __NR_setns (__NR_SYSCALL_BASE+375) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 8d95446150a3..a5b31af5c2b8 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -24,7 +24,7 @@ obj-$(CONFIG_OC_ETM) += etm.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_ARCH_ACORN) += ecard.o -obj-$(CONFIG_FIQ) += fiq.o +obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o @@ -44,6 +44,7 @@ obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o +obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o CFLAGS_swp_emulate.o := -Wa,-march=armv7-a diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 5c26eccef998..80f7896cc016 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -379,6 +379,12 @@ CALL(sys_fanotify_init) CALL(sys_fanotify_mark) CALL(sys_prlimit64) +/* 370 */ CALL(sys_name_to_handle_at) + CALL(sys_open_by_handle_at) + CALL(sys_clock_adjtime) + CALL(sys_syncfs) + CALL(sys_sendmmsg) +/* 375 */ CALL(sys_setns) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c new file mode 100644 index 000000000000..0cdd7b456cb2 --- /dev/null +++ b/arch/arm/kernel/devtree.c @@ -0,0 +1,148 @@ +/* + * linux/arch/arm/kernel/devtree.c + * + * Copyright (C) 2009 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + arm_add_memory(base, size); +} + +void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) +{ + return alloc_bootmem_align(size, align); +} + +void __init arm_dt_memblock_reserve(void) +{ + u64 *reserve_map, base, size; + + if (!initial_boot_params) + return; + + /* Reserve the dtb region */ + memblock_reserve(virt_to_phys(initial_boot_params), + be32_to_cpu(initial_boot_params->totalsize)); + + /* + * Process the reserve map. This will probably overlap the initrd + * and dtb locations which are already reserved, but overlaping + * doesn't hurt anything + */ + reserve_map = ((void*)initial_boot_params) + + be32_to_cpu(initial_boot_params->off_mem_rsvmap); + while (1) { + base = be64_to_cpup(reserve_map++); + size = be64_to_cpup(reserve_map++); + if (!size) + break; + memblock_reserve(base, size); + } +} + +/** + * setup_machine_fdt - Machine setup when an dtb was passed to the kernel + * @dt_phys: physical address of dt blob + * + * If a dtb was passed to the kernel in r2, then use it to choose the + * correct machine_desc and to setup the system. + */ +struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) +{ + struct boot_param_header *devtree; + struct machine_desc *mdesc, *mdesc_best = NULL; + unsigned int score, mdesc_score = ~1; + unsigned long dt_root; + const char *model; + + if (!dt_phys) + return NULL; + + devtree = phys_to_virt(dt_phys); + + /* check device tree validity */ + if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) + return NULL; + + /* Search the mdescs for the 'best' compatible value match */ + initial_boot_params = devtree; + dt_root = of_get_flat_dt_root(); + for_each_machine_desc(mdesc) { + score = of_flat_dt_match(dt_root, mdesc->dt_compat); + if (score > 0 && score < mdesc_score) { + mdesc_best = mdesc; + mdesc_score = score; + } + } + if (!mdesc_best) { + const char *prop; + long size; + + early_print("\nError: unrecognized/unsupported " + "device tree compatible list:\n[ "); + + prop = of_get_flat_dt_prop(dt_root, "compatible", &size); + while (size > 0) { + early_print("'%s' ", prop); + size -= strlen(prop) + 1; + prop += strlen(prop) + 1; + } + early_print("]\n\n"); + + dump_machine_table(); /* does not return */ + } + + model = of_get_flat_dt_prop(dt_root, "model", NULL); + if (!model) + model = of_get_flat_dt_prop(dt_root, "compatible", NULL); + if (!model) + model = ""; + pr_info("Machine: %s, model: %s\n", mdesc_best->name, model); + + /* Retrieve various information from the /chosen node */ + of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); + /* Initialize {size,address}-cells info */ + of_scan_flat_dt(early_init_dt_scan_root, NULL); + /* Setup memory, calling early_init_dt_add_memory_arch */ + of_scan_flat_dt(early_init_dt_scan_memory, NULL); + + /* Change machine number to match the mdesc we're using */ + __machine_arch_type = mdesc_best->nr; + + return mdesc_best; +} + +/** + * irq_create_of_mapping - Hook to resolve OF irq specifier into a Linux irq# + * + * Currently the mapping mechanism is trivial; simple flat hwirq numbers are + * mapped 1:1 onto Linux irq numbers. Cascaded irq controllers are not + * supported. + */ +unsigned int irq_create_of_mapping(struct device_node *controller, + const u32 *intspec, unsigned int intsize) +{ + return intspec[0]; +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index e8d885676807..90c62cd51ca9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -435,6 +435,10 @@ __irq_usr: usr_entry kuser_cmpxchg_check +#ifdef CONFIG_IRQSOFF_TRACER + bl trace_hardirqs_off +#endif + get_thread_info tsk #ifdef CONFIG_PREEMPT ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -453,7 +457,7 @@ __irq_usr: #endif mov why, #0 - b ret_to_user + b ret_to_user_from_irq UNWIND(.fnend ) ENDPROC(__irq_usr) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 1e7b04a40a31..b2a27b6b0046 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -64,6 +64,7 @@ work_resched: ENTRY(ret_to_user) ret_slow_syscall: disable_irq @ disable interrupts +ENTRY(ret_to_user_from_irq) ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne work_pending @@ -75,6 +76,7 @@ no_work_pending: arch_ret_to_user r1, lr restore_user_regs fast = 0, offset = 0 +ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) /* diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index e72dc34eea1c..4c164ece5891 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -89,47 +89,6 @@ void set_fiq_handler(void *start, unsigned int length) flush_icache_range(0x1c, 0x1c + length); } -/* - * Taking an interrupt in FIQ mode is death, so both these functions - * disable irqs for the duration. Note - these functions are almost - * entirely coded in assembly. - */ -void __naked set_fiq_regs(struct pt_regs *regs) -{ - register unsigned long tmp; - asm volatile ( - "mov ip, sp\n\ - stmfd sp!, {fp, ip, lr, pc}\n\ - sub fp, ip, #4\n\ - mrs %0, cpsr\n\ - msr cpsr_c, %2 @ select FIQ mode\n\ - mov r0, r0\n\ - ldmia %1, {r8 - r14}\n\ - msr cpsr_c, %0 @ return to SVC mode\n\ - mov r0, r0\n\ - ldmfd sp, {fp, sp, pc}" - : "=&r" (tmp) - : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); -} - -void __naked get_fiq_regs(struct pt_regs *regs) -{ - register unsigned long tmp; - asm volatile ( - "mov ip, sp\n\ - stmfd sp!, {fp, ip, lr, pc}\n\ - sub fp, ip, #4\n\ - mrs %0, cpsr\n\ - msr cpsr_c, %2 @ select FIQ mode\n\ - mov r0, r0\n\ - stmia %1, {r8 - r14}\n\ - msr cpsr_c, %0 @ return to SVC mode\n\ - mov r0, r0\n\ - ldmfd sp, {fp, sp, pc}" - : "=&r" (tmp) - : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); -} - int claim_fiq(struct fiq_handler *f) { int ret = 0; @@ -174,8 +133,8 @@ void disable_fiq(int fiq) } EXPORT_SYMBOL(set_fiq_handler); -EXPORT_SYMBOL(set_fiq_regs); -EXPORT_SYMBOL(get_fiq_regs); +EXPORT_SYMBOL(__set_fiq_regs); /* defined in fiqasm.S */ +EXPORT_SYMBOL(__get_fiq_regs); /* defined in fiqasm.S */ EXPORT_SYMBOL(claim_fiq); EXPORT_SYMBOL(release_fiq); EXPORT_SYMBOL(enable_fiq); diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S new file mode 100644 index 000000000000..207f9d652010 --- /dev/null +++ b/arch/arm/kernel/fiqasm.S @@ -0,0 +1,49 @@ +/* + * linux/arch/arm/kernel/fiqasm.S + * + * Derived from code originally in linux/arch/arm/kernel/fiq.c: + * + * Copyright (C) 1998 Russell King + * Copyright (C) 1998, 1999 Phil Blundell + * Copyright (C) 2011, Linaro Limited + * + * FIQ support written by Philip Blundell , 1998. + * + * FIQ support re-written by Russell King to be more generic + * + * v7/Thumb-2 compatibility modifications by Linaro Limited, 2011. + */ + +#include +#include + +/* + * Taking an interrupt in FIQ mode is death, so both these functions + * disable irqs for the duration. + */ + +ENTRY(__set_fiq_regs) + mov r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE + mrs r1, cpsr + msr cpsr_c, r2 @ select FIQ mode + mov r0, r0 @ avoid hazard prior to ARMv4 + ldmia r0!, {r8 - r12} + ldr sp, [r0], #4 + ldr lr, [r0] + msr cpsr_c, r1 @ return to SVC mode + mov r0, r0 @ avoid hazard prior to ARMv4 + mov pc, lr +ENDPROC(__set_fiq_regs) + +ENTRY(__get_fiq_regs) + mov r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE + mrs r1, cpsr + msr cpsr_c, r2 @ select FIQ mode + mov r0, r0 @ avoid hazard prior to ARMv4 + stmia r0!, {r8 - r12} + str sp, [r0], #4 + str lr, [r0] + msr cpsr_c, r1 @ return to SVC mode + mov r0, r0 @ avoid hazard prior to ARMv4 + mov pc, lr +ENDPROC(__get_fiq_regs) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index c84b57d27d07..854bd22380d3 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -15,6 +15,12 @@ #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2) #define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2) +#ifdef CONFIG_CPU_BIG_ENDIAN +#define OF_DT_MAGIC 0xd00dfeed +#else +#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */ +#endif + /* * Exception handling. Something went wrong and we can't proceed. We * ought to tell the user, but since we don't have any guarantee that @@ -28,20 +34,26 @@ /* Determine validity of the r2 atags pointer. The heuristic requires * that the pointer be aligned, in the first 16k of physical RAM and - * that the ATAG_CORE marker is first and present. Future revisions + * that the ATAG_CORE marker is first and present. If CONFIG_OF_FLATTREE + * is selected, then it will also accept a dtb pointer. Future revisions * of this function may be more lenient with the physical address and * may also be able to move the ATAGS block if necessary. * * Returns: - * r2 either valid atags pointer, or zero + * r2 either valid atags pointer, valid dtb pointer, or zero * r5, r6 corrupted */ __vet_atags: tst r2, #0x3 @ aligned? bne 1f - ldr r5, [r2, #0] @ is first tag ATAG_CORE? - cmp r5, #ATAG_CORE_SIZE + ldr r5, [r2, #0] +#ifdef CONFIG_OF_FLATTREE + ldr r6, =OF_DT_MAGIC @ is it a DTB? + cmp r5, r6 + beq 2f +#endif + cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE? cmpne r5, #ATAG_CORE_SIZE_EMPTY bne 1f ldr r5, [r2, #4] @@ -49,7 +61,7 @@ __vet_atags: cmp r5, r6 bne 1f - mov pc, lr @ atag pointer is ok +2: mov pc, lr @ atag/dtb pointer is ok 1: mov r2, #0 mov pc, lr @@ -61,7 +73,7 @@ ENDPROC(__vet_atags) * * r0 = cp#15 control register * r1 = machine ID - * r2 = atags pointer + * r2 = atags/dtb pointer * r9 = processor ID */ __INIT diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index c9173cfbbc74..278c1b0ebb2e 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -59,7 +59,7 @@ * * This is normally called from the decompressor code. The requirements * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0, - * r1 = machine nr, r2 = atags pointer. + * r1 = machine nr, r2 = atags or dtb pointer. * * This code is mostly position independent, so if you link the kernel at * 0xc0008000, you call this at __pa(0xc0008000). @@ -91,7 +91,7 @@ ENTRY(stext) #endif /* - * r1 = machine no, r2 = atags, + * r1 = machine no, r2 = atags or dtb, * r8 = phys_offset, r9 = cpuid, r10 = procinfo */ bl __vet_atags @@ -113,6 +113,7 @@ ENTRY(stext) ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address + mov r8, r4 @ set TTBR1 to swapper_pg_dir ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) THUMB( mov pc, r12 ) @@ -302,8 +303,10 @@ ENTRY(secondary_startup) */ adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after - sub r4, r4, r5 @ mmu has been enabled - ldr r4, [r7, r4] @ get secondary_data.pgdir + sub lr, r4, r5 @ mmu has been enabled + ldr r4, [r7, lr] @ get secondary_data.pgdir + add r7, r7, #4 + ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir adr lr, BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched address ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor @@ -339,7 +342,7 @@ __secondary_data: * * r0 = cp#15 control register * r1 = machine ID - * r2 = atags pointer + * r2 = atags or dtb pointer * r4 = page table pointer * r9 = processor ID * r13 = *virtual* address to jump to upon completion @@ -376,7 +379,7 @@ ENDPROC(__enable_mmu) * * r0 = cp#15 control register * r1 = machine ID - * r2 = atags pointer + * r2 = atags or dtb pointer * r9 = processor ID * r13 = *virtual* address to jump to upon completion * diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c index 23891317dc4b..15eeff6aea0e 100644 --- a/arch/arm/kernel/kprobes-decode.c +++ b/arch/arm/kernel/kprobes-decode.c @@ -34,9 +34,6 @@ * * *) If the PC is written to by the instruction, the * instruction must be fully simulated in software. - * If it is a conditional instruction, the handler - * will use insn[0] to copy its condition code to - * set r0 to 1 and insn[1] to "mov pc, lr" to return. * * *) Otherwise, a modified form of the instruction is * directly executed. Its handler calls the @@ -68,13 +65,17 @@ #define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25) +#define is_r15(insn, bitpos) (((insn) & (0xf << bitpos)) == (0xf << bitpos)) + +/* + * Test if load/store instructions writeback the address register. + * if P (bit 24) == 0 or W (bit 21) == 1 + */ +#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000) + #define PSR_fs (PSR_f|PSR_s) #define KPROBE_RETURN_INSTRUCTION 0xe1a0f00e /* mov pc, lr */ -#define SET_R0_TRUE_INSTRUCTION 0xe3a00001 /* mov r0, #1 */ - -#define truecc_insn(insn) (((insn) & 0xf0000000) | \ - (SET_R0_TRUE_INSTRUCTION & 0x0fffffff)) typedef long (insn_0arg_fn_t)(void); typedef long (insn_1arg_fn_t)(long); @@ -419,14 +420,10 @@ insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs) { - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; long iaddr = (long)p->addr; int disp = branch_displacement(insn); - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - if (insn & (1 << 24)) regs->ARM_lr = iaddr + 4; @@ -446,14 +443,10 @@ static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs) static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs) { - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; int rm = insn & 0xf; long rmv = regs->uregs[rm]; - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - if (insn & (1 << 5)) regs->ARM_lr = (long)p->addr + 4; @@ -463,9 +456,16 @@ static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs) regs->ARM_cpsr |= PSR_T_BIT; } +static void __kprobes simulate_mrs(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + unsigned long mask = 0xf8ff03df; /* Mask out execution state */ + regs->uregs[rd] = regs->ARM_cpsr & mask; +} + static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs) { - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; int rn = (insn >> 16) & 0xf; int lbit = insn & (1 << 20); @@ -476,9 +476,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs) int reg_bit_vector; int reg_count; - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - reg_count = 0; reg_bit_vector = insn & 0xffff; while (reg_bit_vector) { @@ -510,11 +507,6 @@ static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs) static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs) { - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - regs->ARM_pc = (long)p->addr + str_pc_offset; simulate_ldm1stm1(p, regs); regs->ARM_pc = (long)p->addr + 4; @@ -525,24 +517,16 @@ static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs) regs->uregs[12] = regs->uregs[13]; } -static void __kprobes emulate_ldcstc(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rn = (insn >> 16) & 0xf; - long rnv = regs->uregs[rn]; - - /* Save Rn in case of writeback. */ - regs->uregs[rn] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); -} - static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs) { insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; int rd = (insn >> 12) & 0xf; int rn = (insn >> 16) & 0xf; int rm = insn & 0xf; /* rm may be invalid, don't care. */ + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; /* Not following the C calling convention here, so need asm(). */ __asm__ __volatile__ ( @@ -554,29 +538,36 @@ static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs) "str r0, %[rn] \n\t" /* in case of writeback */ "str r2, %[rd0] \n\t" "str r3, %[rd1] \n\t" - : [rn] "+m" (regs->uregs[rn]), + : [rn] "+m" (rnv), [rd0] "=m" (regs->uregs[rd]), [rd1] "=m" (regs->uregs[rd+1]) - : [rm] "m" (regs->uregs[rm]), + : [rm] "m" (rmv), [cpsr] "r" (regs->ARM_cpsr), [i_fn] "r" (i_fn) : "r0", "r1", "r2", "r3", "lr", "cc" ); + if (is_writeback(insn)) + regs->uregs[rn] = rnv; } static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs) { insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; int rd = (insn >> 12) & 0xf; int rn = (insn >> 16) & 0xf; int rm = insn & 0xf; - long rnv = regs->uregs[rn]; - long rmv = regs->uregs[rm]; /* rm/rmv may be invalid, don't care. */ + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + /* rm/rmv may be invalid, don't care. */ + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rnv_wb; - regs->uregs[rn] = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd], + rnv_wb = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd], regs->uregs[rd+1], regs->ARM_cpsr, i_fn); + if (is_writeback(insn)) + regs->uregs[rn] = rnv_wb; } static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs) @@ -630,31 +621,6 @@ static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs) regs->uregs[rn] = rnv_wb; /* Save Rn in case of writeback. */ } -static void __kprobes emulate_mrrc(struct kprobe *p, struct pt_regs *regs) -{ - insn_llret_0arg_fn_t *i_fn = (insn_llret_0arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - union reg_pair fnr; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - - fnr.dr = insnslot_llret_0arg_rflags(regs->ARM_cpsr, i_fn); - regs->uregs[rn] = fnr.r0; - regs->uregs[rd] = fnr.r1; -} - -static void __kprobes emulate_mcrr(struct kprobe *p, struct pt_regs *regs) -{ - insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - long rnv = regs->uregs[rn]; - long rdv = regs->uregs[rd]; - - insnslot_2arg_rflags(rnv, rdv, regs->ARM_cpsr, i_fn); -} - static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs) { insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; @@ -688,32 +654,32 @@ static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs) insnslot_0arg_rflags(regs->ARM_cpsr, i_fn); } -static void __kprobes emulate_rd12(struct kprobe *p, struct pt_regs *regs) +static void __kprobes emulate_nop(struct kprobe *p, struct pt_regs *regs) { - insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0]; +} + +static void __kprobes +emulate_rd12_modify(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; int rd = (insn >> 12) & 0xf; + long rdv = regs->uregs[rd]; - regs->uregs[rd] = insnslot_0arg_rflags(regs->ARM_cpsr, i_fn); + regs->uregs[rd] = insnslot_1arg_rflags(rdv, regs->ARM_cpsr, i_fn); } -static void __kprobes emulate_ird12(struct kprobe *p, struct pt_regs *regs) +static void __kprobes +emulate_rd12rn0_modify(struct kprobe *p, struct pt_regs *regs) { - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; kprobe_opcode_t insn = p->opcode; - int ird = (insn >> 12) & 0xf; - - insnslot_1arg_rflags(regs->uregs[ird], regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_rn16(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rn = (insn >> 16) & 0xf; + int rd = (insn >> 12) & 0xf; + int rn = insn & 0xf; + long rdv = regs->uregs[rd]; long rnv = regs->uregs[rn]; - insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); + regs->uregs[rd] = insnslot_2arg_rflags(rdv, rnv, regs->ARM_cpsr, i_fn); } static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs) @@ -818,6 +784,17 @@ emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs) regs->uregs[rd] = insnslot_1arg_rwflags(rnv, ®s->ARM_cpsr, i_fn); } +static void __kprobes +emulate_alu_tests_imm(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rn = (insn >> 16) & 0xf; + long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn]; + + insnslot_1arg_rwflags(rnv, ®s->ARM_cpsr, i_fn); +} + static void __kprobes emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs) { @@ -854,14 +831,34 @@ emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs) insnslot_3arg_rwflags(rnv, rmv, rsv, ®s->ARM_cpsr, i_fn); } +static void __kprobes +emulate_alu_tests(struct kprobe *p, struct pt_regs *regs) +{ + insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long ppc = (long)p->addr + 8; + int rn = (insn >> 16) & 0xf; + int rs = (insn >> 8) & 0xf; /* rs/rsv may be invalid, don't care. */ + int rm = insn & 0xf; + long rnv = (rn == 15) ? ppc : regs->uregs[rn]; + long rmv = (rm == 15) ? ppc : regs->uregs[rm]; + long rsv = regs->uregs[rs]; + + insnslot_3arg_rwflags(rnv, rmv, rsv, ®s->ARM_cpsr, i_fn); +} + static enum kprobe_insn __kprobes prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi) { - int ibit = (insn & (1 << 26)) ? 25 : 22; + int not_imm = (insn & (1 << 26)) ? (insn & (1 << 25)) + : (~insn & (1 << 22)); + + if (is_writeback(insn) && is_r15(insn, 16)) + return INSN_REJECTED; /* Writeback to PC */ insn &= 0xfff00fff; insn |= 0x00001000; /* Rn = r0, Rd = r1 */ - if (insn & (1 << ibit)) { + if (not_imm) { insn &= ~0xf; insn |= 2; /* Rm = r2 */ } @@ -870,28 +867,51 @@ prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi) return INSN_GOOD; } +static enum kprobe_insn __kprobes +prep_emulate_rd12_modify(kprobe_opcode_t insn, struct arch_specific_insn *asi) +{ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xffff0fff; /* Rd = r0 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd12_modify; + return INSN_GOOD; +} + +static enum kprobe_insn __kprobes +prep_emulate_rd12rn0_modify(kprobe_opcode_t insn, + struct arch_specific_insn *asi) +{ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + + insn &= 0xffff0ff0; /* Rd = r0 */ + insn |= 0x00000001; /* Rn = r1 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_rd12rn0_modify; + return INSN_GOOD; +} + static enum kprobe_insn __kprobes prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + insn &= 0xffff0ff0; /* Rd = r0, Rm = r0 */ asi->insn[0] = insn; asi->insn_handler = emulate_rd12rm0; return INSN_GOOD; } -static enum kprobe_insn __kprobes -prep_emulate_rd12(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rd12; - return INSN_GOOD; -} - static enum kprobe_insn __kprobes prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + insn &= 0xfff00ff0; /* Rd = r0, Rn = r0 */ insn |= 0x00000001; /* Rm = r1 */ asi->insn[0] = insn; @@ -903,6 +923,9 @@ static enum kprobe_insn __kprobes prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + if (is_r15(insn, 16)) + return INSN_REJECTED; /* Rd is PC */ + insn &= 0xfff0f0f0; /* Rd = r0, Rs = r0 */ insn |= 0x00000001; /* Rm = r1 */ asi->insn[0] = insn; @@ -914,6 +937,9 @@ static enum kprobe_insn __kprobes prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + if (is_r15(insn, 16)) + return INSN_REJECTED; /* Rd is PC */ + insn &= 0xfff000f0; /* Rd = r0, Rn = r0 */ insn |= 0x00000102; /* Rs = r1, Rm = r2 */ asi->insn[0] = insn; @@ -925,6 +951,9 @@ static enum kprobe_insn __kprobes prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + if (is_r15(insn, 16) || is_r15(insn, 12)) + return INSN_REJECTED; /* RdHi or RdLo is PC */ + insn &= 0xfff000f0; /* RdHi = r0, RdLo = r1 */ insn |= 0x00001203; /* Rs = r2, Rm = r3 */ asi->insn[0] = insn; @@ -945,20 +974,13 @@ prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn, static enum kprobe_insn __kprobes space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi) { - /* CPS mmod == 1 : 1111 0001 0000 xx10 xxxx xxxx xx0x xxxx */ - /* RFE : 1111 100x x0x1 xxxx xxxx 1010 xxxx xxxx */ - /* SRS : 1111 100x x1x0 1101 xxxx 0101 xxxx xxxx */ - if ((insn & 0xfff30020) == 0xf1020000 || - (insn & 0xfe500f00) == 0xf8100a00 || - (insn & 0xfe5f0f00) == 0xf84d0500) - return INSN_REJECTED; - - /* PLD : 1111 01x1 x101 xxxx xxxx xxxx xxxx xxxx : */ - if ((insn & 0xfd700000) == 0xf4500000) { - insn &= 0xfff0ffff; /* Rn = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rn16; - return INSN_GOOD; + /* memory hint : 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx : */ + /* PLDI : 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx : */ + /* PLDW : 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx : */ + /* PLD : 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx : */ + if ((insn & 0xfe300000) == 0xf4100000) { + asi->insn_handler = emulate_nop; + return INSN_GOOD_NO_SLOT; } /* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */ @@ -967,41 +989,22 @@ space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi) return INSN_GOOD_NO_SLOT; } - /* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */ - /* CDP2 : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ - if ((insn & 0xffff00f0) == 0xf1010000 || - (insn & 0xff000010) == 0xfe000000) { - asi->insn[0] = insn; - asi->insn_handler = emulate_none; - return INSN_GOOD; - } + /* CPS : 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */ + /* SETEND: 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */ + /* SRS : 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */ + /* RFE : 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ + + /* Coprocessor instructions... */ /* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */ /* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */ - if ((insn & 0xffe00000) == 0xfc400000) { - insn &= 0xfff00fff; /* Rn = r0 */ - insn |= 0x00001000; /* Rd = r1 */ - asi->insn[0] = insn; - asi->insn_handler = - (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr; - return INSN_GOOD; - } + /* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ + /* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ + /* CDP2 : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ + /* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ + /* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ - /* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ - /* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ - if ((insn & 0xfe000000) == 0xfc000000) { - insn &= 0xfff0ffff; /* Rn = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_ldcstc; - return INSN_GOOD; - } - - /* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ - /* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12; - return INSN_GOOD; + return INSN_REJECTED; } static enum kprobe_insn __kprobes @@ -1010,19 +1013,18 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */ if ((insn & 0x0f900010) == 0x01000000) { - /* BXJ : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */ - /* MSR : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */ - if ((insn & 0x0ff000f0) == 0x01200020 || - (insn & 0x0fb000f0) == 0x01200000) - return INSN_REJECTED; - - /* MRS : cccc 0001 0x00 xxxx xxxx xxxx 0000 xxxx */ - if ((insn & 0x0fb00010) == 0x01000000) - return prep_emulate_rd12(insn, asi); + /* MRS cpsr : cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */ + if ((insn & 0x0ff000f0) == 0x01000000) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ + asi->insn_handler = simulate_mrs; + return INSN_GOOD_NO_SLOT; + } /* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */ if ((insn & 0x0ff00090) == 0x01400080) - return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); + return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, + asi); /* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */ /* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */ @@ -1031,24 +1033,29 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) return prep_emulate_rd16rs8rm0_wflags(insn, asi); /* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */ - /* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 0x00 xxxx : Q */ - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + /* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx : Q */ + if ((insn & 0x0ff00090) == 0x01000080 || + (insn & 0x0ff000b0) == 0x01200080) + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + /* BXJ : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */ + /* MSR : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */ + /* MRS spsr : cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */ + + /* Other instruction encodings aren't yet defined */ + return INSN_REJECTED; } /* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */ else if ((insn & 0x0f900090) == 0x01000010) { - /* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ - if ((insn & 0xfff000f0) == 0xe1200070) - return INSN_REJECTED; - /* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */ /* BX : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */ if ((insn & 0x0ff000d0) == 0x01200010) { - asi->insn[0] = truecc_insn(insn); + if ((insn & 0x0ff000ff) == 0x0120003f) + return INSN_REJECTED; /* BLX pc */ asi->insn_handler = simulate_blx2bx; - return INSN_GOOD; + return INSN_GOOD_NO_SLOT; } /* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */ @@ -1059,17 +1066,27 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* QSUB : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */ /* QDADD : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */ /* QDSUB : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */ - return prep_emulate_rd12rn16rm0_wflags(insn, asi); + if ((insn & 0x0f9000f0) == 0x01000050) + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + + /* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ + /* SMC : cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */ + + /* Other instruction encodings aren't yet defined */ + return INSN_REJECTED; } /* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */ - else if ((insn & 0x0f000090) == 0x00000090) { + else if ((insn & 0x0f0000f0) == 0x00000090) { /* MUL : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx : */ /* MULS : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */ /* MLA : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx : */ /* MLAS : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */ /* UMAAL : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx : */ + /* undef : cccc 0000 0101 xxxx xxxx xxxx 1001 xxxx : */ + /* MLS : cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx : */ + /* undef : cccc 0000 0111 xxxx xxxx xxxx 1001 xxxx : */ /* UMULL : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx : */ /* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */ /* UMLAL : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx : */ @@ -1078,13 +1095,15 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */ /* SMLAL : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx : */ /* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */ - if ((insn & 0x0fe000f0) == 0x00000090) { - return prep_emulate_rd16rs8rm0_wflags(insn, asi); - } else if ((insn & 0x0fe000f0) == 0x00200090) { - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - } else { - return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); - } + if ((insn & 0x00d00000) == 0x00500000) + return INSN_REJECTED; + else if ((insn & 0x00e00000) == 0x00000000) + return prep_emulate_rd16rs8rm0_wflags(insn, asi); + else if ((insn & 0x00a00000) == 0x00200000) + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + else + return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, + asi); } /* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */ @@ -1092,23 +1111,45 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* SWP : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */ /* SWPB : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */ - /* LDRD : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */ - /* STRD : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */ + /* ??? : cccc 0001 0x01 xxxx xxxx xxxx 1001 xxxx */ + /* ??? : cccc 0001 0x10 xxxx xxxx xxxx 1001 xxxx */ + /* ??? : cccc 0001 0x11 xxxx xxxx xxxx 1001 xxxx */ /* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */ /* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */ + /* STREXD: cccc 0001 1010 xxxx xxxx xxxx 1001 xxxx */ + /* LDREXD: cccc 0001 1011 xxxx xxxx xxxx 1001 xxxx */ + /* STREXB: cccc 0001 1100 xxxx xxxx xxxx 1001 xxxx */ + /* LDREXB: cccc 0001 1101 xxxx xxxx xxxx 1001 xxxx */ + /* STREXH: cccc 0001 1110 xxxx xxxx xxxx 1001 xxxx */ + /* LDREXH: cccc 0001 1111 xxxx xxxx xxxx 1001 xxxx */ + + /* LDRD : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */ + /* STRD : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */ /* LDRH : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */ /* STRH : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */ /* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */ /* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */ - if ((insn & 0x0fb000f0) == 0x01000090) { - /* SWP/SWPB */ - return prep_emulate_rd12rn16rm0_wflags(insn, asi); + if ((insn & 0x0f0000f0) == 0x01000090) { + if ((insn & 0x0fb000f0) == 0x01000090) { + /* SWP/SWPB */ + return prep_emulate_rd12rn16rm0_wflags(insn, + asi); + } else { + /* STREX/LDREX variants and unallocaed space */ + return INSN_REJECTED; + } + } else if ((insn & 0x0e1000d0) == 0x00000d0) { /* STRD/LDRD */ + if ((insn & 0x0000e000) == 0x0000e000) + return INSN_REJECTED; /* Rd is LR or PC */ + if (is_writeback(insn) && is_r15(insn, 16)) + return INSN_REJECTED; /* Writeback to PC */ + insn &= 0xfff00fff; insn |= 0x00002000; /* Rn = r0, Rd = r2 */ - if (insn & (1 << 22)) { - /* I bit */ + if (!(insn & (1 << 22))) { + /* Register index */ insn &= ~0xf; insn |= 1; /* Rm = r1 */ } @@ -1118,6 +1159,9 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) return INSN_GOOD; } + /* LDRH/STRH/LDRSB/LDRSH */ + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ return prep_emulate_ldr_str(insn, asi); } @@ -1125,7 +1169,7 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* * ALU op with S bit and Rd == 15 : - * cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx + * cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx */ if ((insn & 0x0e10f000) == 0x0010f000) return INSN_REJECTED; @@ -1154,22 +1198,61 @@ space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) insn |= 0x00000200; /* Rs = r2 */ } asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ + + if ((insn & 0x0f900000) == 0x01100000) { + /* + * TST : cccc 0001 0001 xxxx xxxx xxxx xxxx xxxx + * TEQ : cccc 0001 0011 xxxx xxxx xxxx xxxx xxxx + * CMP : cccc 0001 0101 xxxx xxxx xxxx xxxx xxxx + * CMN : cccc 0001 0111 xxxx xxxx xxxx xxxx xxxx + */ + asi->insn_handler = emulate_alu_tests; + } else { + /* ALU ops which write to Rd */ + asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ emulate_alu_rwflags : emulate_alu_rflags; + } return INSN_GOOD; } static enum kprobe_insn __kprobes space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + /* MOVW : cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */ + /* MOVT : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */ + if ((insn & 0x0fb00000) == 0x03000000) + return prep_emulate_rd12_modify(insn, asi); + + /* hints : cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */ + if ((insn & 0x0fff0000) == 0x03200000) { + unsigned op2 = insn & 0x000000ff; + if (op2 == 0x01 || op2 == 0x04) { + /* YIELD : cccc 0011 0010 0000 xxxx xxxx 0000 0001 */ + /* SEV : cccc 0011 0010 0000 xxxx xxxx 0000 0100 */ + asi->insn[0] = insn; + asi->insn_handler = emulate_none; + return INSN_GOOD; + } else if (op2 <= 0x03) { + /* NOP : cccc 0011 0010 0000 xxxx xxxx 0000 0000 */ + /* WFE : cccc 0011 0010 0000 xxxx xxxx 0000 0010 */ + /* WFI : cccc 0011 0010 0000 xxxx xxxx 0000 0011 */ + /* + * We make WFE and WFI true NOPs to avoid stalls due + * to missing events whilst processing the probe. + */ + asi->insn_handler = emulate_nop; + return INSN_GOOD_NO_SLOT; + } + /* For DBG and unallocated hints it's safest to reject them */ + return INSN_REJECTED; + } + /* * MSR : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx - * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx * ALU op with S bit and Rd == 15 : * cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx */ if ((insn & 0x0fb00000) == 0x03200000 || /* MSR */ - (insn & 0x0ff00000) == 0x03400000 || /* Undef */ (insn & 0x0e10f000) == 0x0210f000) /* ALU s-bit, R15 */ return INSN_REJECTED; @@ -1180,10 +1263,22 @@ space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi) * *S (bit 20) updates condition codes * ADC/SBC/RSC reads the C flag */ - insn &= 0xffff0fff; /* Rd = r0 */ + insn &= 0xfff00fff; /* Rn = r0 and Rd = r0 */ asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ + + if ((insn & 0x0f900000) == 0x03100000) { + /* + * TST : cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx + * TEQ : cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx + * CMP : cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx + * CMN : cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx + */ + asi->insn_handler = emulate_alu_tests_imm; + } else { + /* ALU ops which write to Rd */ + asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ emulate_alu_imm_rwflags : emulate_alu_imm_rflags; + } return INSN_GOOD; } @@ -1192,6 +1287,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) { /* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */ if ((insn & 0x0ff000f0) == 0x068000b0) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ insn &= 0xfff00ff0; /* Rd = r0, Rn = r0 */ insn |= 0x00000001; /* Rm = r1 */ asi->insn[0] = insn; @@ -1205,6 +1302,8 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */ if ((insn & 0x0fa00030) == 0x06a00010 || (insn & 0x0fb000f0) == 0x06a00030) { + if (is_r15(insn, 12)) + return INSN_REJECTED; /* Rd is PC */ insn &= 0xffff0ff0; /* Rd = r0, Rm = r0 */ asi->insn[0] = insn; asi->insn_handler = emulate_sat; @@ -1213,57 +1312,101 @@ space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* REV : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */ /* REV16 : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */ + /* RBIT : cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */ /* REVSH : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */ if ((insn & 0x0ff00070) == 0x06b00030 || - (insn & 0x0ff000f0) == 0x06f000b0) + (insn & 0x0ff00070) == 0x06f00030) return prep_emulate_rd12rm0(insn, asi); + /* ??? : cccc 0110 0000 xxxx xxxx xxxx xxx1 xxxx : */ /* SADD16 : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */ /* SADDSUBX : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */ /* SSUBADDX : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */ /* SSUB16 : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */ /* SADD8 : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */ + /* ??? : cccc 0110 0001 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0001 xxxx xxxx xxxx 1101 xxxx : */ /* SSUB8 : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */ /* QADD16 : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx : */ /* QADDSUBX : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx : */ /* QSUBADDX : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx : */ /* QSUB16 : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx : */ /* QADD8 : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0010 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0010 xxxx xxxx xxxx 1101 xxxx : */ /* QSUB8 : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx : */ /* SHADD16 : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx : */ /* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx : */ /* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx : */ /* SHSUB16 : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx : */ /* SHADD8 : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0011 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0011 xxxx xxxx xxxx 1101 xxxx : */ /* SHSUB8 : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx : */ + /* ??? : cccc 0110 0100 xxxx xxxx xxxx xxx1 xxxx : */ /* UADD16 : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */ /* UADDSUBX : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */ /* USUBADDX : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */ /* USUB16 : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */ /* UADD8 : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */ + /* ??? : cccc 0110 0101 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0101 xxxx xxxx xxxx 1101 xxxx : */ /* USUB8 : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */ /* UQADD16 : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx : */ /* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx : */ /* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx : */ /* UQSUB16 : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx : */ /* UQADD8 : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0110 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0110 xxxx xxxx xxxx 1101 xxxx : */ /* UQSUB8 : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx : */ /* UHADD16 : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx : */ /* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx : */ /* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx : */ /* UHSUB16 : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx : */ /* UHADD8 : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx : */ + /* ??? : cccc 0110 0111 xxxx xxxx xxxx 1011 xxxx : */ + /* ??? : cccc 0110 0111 xxxx xxxx xxxx 1101 xxxx : */ /* UHSUB8 : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx : */ + if ((insn & 0x0f800010) == 0x06000010) { + if ((insn & 0x00300000) == 0x00000000 || + (insn & 0x000000e0) == 0x000000a0 || + (insn & 0x000000e0) == 0x000000c0) + return INSN_REJECTED; /* Unallocated space */ + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + } + /* PKHBT : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx : */ /* PKHTB : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx : */ + if ((insn & 0x0ff00030) == 0x06800010) + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + /* SXTAB16 : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx : */ - /* SXTB : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTB16 : cccc 0110 1000 1111 xxxx xxxx 0111 xxxx : */ + /* ??? : cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx : */ /* SXTAB : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTB : cccc 0110 1010 1111 xxxx xxxx 0111 xxxx : */ /* SXTAH : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx : */ + /* SXTH : cccc 0110 1011 1111 xxxx xxxx 0111 xxxx : */ /* UXTAB16 : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx : */ + /* UXTB16 : cccc 0110 1100 1111 xxxx xxxx 0111 xxxx : */ + /* ??? : cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx : */ /* UXTAB : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx : */ + /* UXTB : cccc 0110 1110 1111 xxxx xxxx 0111 xxxx : */ /* UXTAH : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx : */ - return prep_emulate_rd12rn16rm0_wflags(insn, asi); + /* UXTH : cccc 0110 1111 1111 xxxx xxxx 0111 xxxx : */ + if ((insn & 0x0f8000f0) == 0x06800070) { + if ((insn & 0x00300000) == 0x00100000) + return INSN_REJECTED; /* Unallocated space */ + + if ((insn & 0x000f0000) == 0x000f0000) + return prep_emulate_rd12rm0(insn, asi); + else + return prep_emulate_rd12rn16rm0_wflags(insn, asi); + } + + /* Other instruction encodings aren't yet defined */ + return INSN_REJECTED; } static enum kprobe_insn __kprobes @@ -1273,29 +1416,49 @@ space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) if ((insn & 0x0ff000f0) == 0x03f000f0) return INSN_REJECTED; - /* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */ - /* USAD8 : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */ - if ((insn & 0x0ff000f0) == 0x07800010) - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - /* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */ /* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */ if ((insn & 0x0ff00090) == 0x07400010) return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); /* SMLAD : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */ + /* SMUAD : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */ /* SMLSD : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */ + /* SMUSD : cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx : */ /* SMMLA : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx : */ - /* SMMLS : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx : */ + /* SMMUL : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx : */ + /* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx : */ + /* USAD8 : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx : */ if ((insn & 0x0ff00090) == 0x07000010 || (insn & 0x0ff000d0) == 0x07500010 || - (insn & 0x0ff000d0) == 0x075000d0) + (insn & 0x0ff000f0) == 0x07800010) { + + if ((insn & 0x0000f000) == 0x0000f000) + return prep_emulate_rd16rs8rm0_wflags(insn, asi); + else + return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); + } + + /* SMMLS : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx : */ + if ((insn & 0x0ff000d0) == 0x075000d0) return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - /* SMUSD : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx : */ - /* SMUAD : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */ - /* SMMUL : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx : */ - return prep_emulate_rd16rs8rm0_wflags(insn, asi); + /* SBFX : cccc 0111 101x xxxx xxxx xxxx x101 xxxx : */ + /* UBFX : cccc 0111 111x xxxx xxxx xxxx x101 xxxx : */ + if ((insn & 0x0fa00070) == 0x07a00050) + return prep_emulate_rd12rm0(insn, asi); + + /* BFI : cccc 0111 110x xxxx xxxx xxxx x001 xxxx : */ + /* BFC : cccc 0111 110x xxxx xxxx xxxx x001 1111 : */ + if ((insn & 0x0fe00070) == 0x07c00010) { + + if ((insn & 0x0000000f) == 0x0000000f) + return prep_emulate_rd12_modify(insn, asi); + else + return prep_emulate_rd12rn0_modify(insn, asi); + } + + return INSN_REJECTED; } static enum kprobe_insn __kprobes @@ -1309,6 +1472,10 @@ space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* STRB : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */ /* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */ /* STRT : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */ + + if ((insn & 0x00500000) == 0x00500000 && is_r15(insn, 12)) + return INSN_REJECTED; /* LDRB into PC */ + return prep_emulate_ldr_str(insn, asi); } @@ -1323,10 +1490,9 @@ space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi) /* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ /* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */ - asi->insn[0] = truecc_insn(insn); asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */ simulate_stm1_pc : simulate_ldm1stm1; - return INSN_GOOD; + return INSN_GOOD_NO_SLOT; } static enum kprobe_insn __kprobes @@ -1334,58 +1500,117 @@ space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi) { /* B : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */ /* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */ - asi->insn[0] = truecc_insn(insn); asi->insn_handler = simulate_bbl; - return INSN_GOOD; + return INSN_GOOD_NO_SLOT; } static enum kprobe_insn __kprobes -space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +space_cccc_11xx(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + /* Coprocessor instructions... */ /* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */ /* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */ - insn &= 0xfff00fff; - insn |= 0x00001000; /* Rn = r0, Rd = r1 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr; - return INSN_GOOD; + /* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ + /* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ + /* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ + /* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ + /* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ + + /* SVC : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */ + + return INSN_REJECTED; } -static enum kprobe_insn __kprobes -space_cccc_110x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +static unsigned long __kprobes __check_eq(unsigned long cpsr) { - /* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ - /* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ - insn &= 0xfff0ffff; /* Rn = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_ldcstc; - return INSN_GOOD; + return cpsr & PSR_Z_BIT; } -static enum kprobe_insn __kprobes -space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi) +static unsigned long __kprobes __check_ne(unsigned long cpsr) { - /* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ - /* SWI : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */ - if ((insn & 0xfff000f0) == 0xe1200070 || - (insn & 0x0f000000) == 0x0f000000) - return INSN_REJECTED; - - /* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ - if ((insn & 0x0f000010) == 0x0e000000) { - asi->insn[0] = insn; - asi->insn_handler = emulate_none; - return INSN_GOOD; - } - - /* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ - /* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12; - return INSN_GOOD; + return (~cpsr) & PSR_Z_BIT; } +static unsigned long __kprobes __check_cs(unsigned long cpsr) +{ + return cpsr & PSR_C_BIT; +} + +static unsigned long __kprobes __check_cc(unsigned long cpsr) +{ + return (~cpsr) & PSR_C_BIT; +} + +static unsigned long __kprobes __check_mi(unsigned long cpsr) +{ + return cpsr & PSR_N_BIT; +} + +static unsigned long __kprobes __check_pl(unsigned long cpsr) +{ + return (~cpsr) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_vs(unsigned long cpsr) +{ + return cpsr & PSR_V_BIT; +} + +static unsigned long __kprobes __check_vc(unsigned long cpsr) +{ + return (~cpsr) & PSR_V_BIT; +} + +static unsigned long __kprobes __check_hi(unsigned long cpsr) +{ + cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return cpsr & PSR_C_BIT; +} + +static unsigned long __kprobes __check_ls(unsigned long cpsr) +{ + cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (~cpsr) & PSR_C_BIT; +} + +static unsigned long __kprobes __check_ge(unsigned long cpsr) +{ + cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (~cpsr) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_lt(unsigned long cpsr) +{ + cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return cpsr & PSR_N_BIT; +} + +static unsigned long __kprobes __check_gt(unsigned long cpsr) +{ + unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + temp |= (cpsr << 1); /* PSR_N_BIT |= PSR_Z_BIT */ + return (~temp) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_le(unsigned long cpsr) +{ + unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + temp |= (cpsr << 1); /* PSR_N_BIT |= PSR_Z_BIT */ + return temp & PSR_N_BIT; +} + +static unsigned long __kprobes __check_al(unsigned long cpsr) +{ + return true; +} + +static kprobe_check_cc * const condition_checks[16] = { + &__check_eq, &__check_ne, &__check_cs, &__check_cc, + &__check_mi, &__check_pl, &__check_vs, &__check_vc, + &__check_hi, &__check_ls, &__check_ge, &__check_lt, + &__check_gt, &__check_le, &__check_al, &__check_al +}; + /* Return: * INSN_REJECTED If instruction is one not allowed to kprobe, * INSN_GOOD If instruction is supported and uses instruction slot, @@ -1401,133 +1626,45 @@ space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi) enum kprobe_insn __kprobes arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) { + asi->insn_check_cc = condition_checks[insn>>28]; asi->insn[1] = KPROBE_RETURN_INSTRUCTION; - if ((insn & 0xf0000000) == 0xf0000000) { + if ((insn & 0xf0000000) == 0xf0000000) return space_1111(insn, asi); - } else if ((insn & 0x0e000000) == 0x00000000) { + else if ((insn & 0x0e000000) == 0x00000000) return space_cccc_000x(insn, asi); - } else if ((insn & 0x0e000000) == 0x02000000) { + else if ((insn & 0x0e000000) == 0x02000000) return space_cccc_001x(insn, asi); - } else if ((insn & 0x0f000010) == 0x06000010) { + else if ((insn & 0x0f000010) == 0x06000010) return space_cccc_0110__1(insn, asi); - } else if ((insn & 0x0f000010) == 0x07000010) { + else if ((insn & 0x0f000010) == 0x07000010) return space_cccc_0111__1(insn, asi); - } else if ((insn & 0x0c000000) == 0x04000000) { + else if ((insn & 0x0c000000) == 0x04000000) return space_cccc_01xx(insn, asi); - } else if ((insn & 0x0e000000) == 0x08000000) { + else if ((insn & 0x0e000000) == 0x08000000) return space_cccc_100x(insn, asi); - } else if ((insn & 0x0e000000) == 0x0a000000) { + else if ((insn & 0x0e000000) == 0x0a000000) return space_cccc_101x(insn, asi); - } else if ((insn & 0x0fe00000) == 0x0c400000) { - - return space_cccc_1100_010x(insn, asi); - - } else if ((insn & 0x0e000000) == 0x0c000000) { - - return space_cccc_110x(insn, asi); - - } - - return space_cccc_111x(insn, asi); + return space_cccc_11xx(insn, asi); } void __init arm_kprobe_decode_init(void) { find_str_pc_offset(); } - - -/* - * All ARM instructions listed below. - * - * Instructions and their general purpose registers are given. - * If a particular register may not use R15, it is prefixed with a "!". - * If marked with a "*" means the value returned by reading R15 - * is implementation defined. - * - * ADC/ADD/AND/BIC/CMN/CMP/EOR/MOV/MVN/ORR/RSB/RSC/SBC/SUB/TEQ - * TST: Rd, Rn, Rm, !Rs - * BX: Rm - * BLX(2): !Rm - * BX: Rm (R15 legal, but discouraged) - * BXJ: !Rm, - * CLZ: !Rd, !Rm - * CPY: Rd, Rm - * LDC/2,STC/2 immediate offset & unindex: Rn - * LDC/2,STC/2 immediate pre/post-indexed: !Rn - * LDM(1/3): !Rn, register_list - * LDM(2): !Rn, !register_list - * LDR,STR,PLD immediate offset: Rd, Rn - * LDR,STR,PLD register offset: Rd, Rn, !Rm - * LDR,STR,PLD scaled register offset: Rd, !Rn, !Rm - * LDR,STR immediate pre/post-indexed: Rd, !Rn - * LDR,STR register pre/post-indexed: Rd, !Rn, !Rm - * LDR,STR scaled register pre/post-indexed: Rd, !Rn, !Rm - * LDRB,STRB immediate offset: !Rd, Rn - * LDRB,STRB register offset: !Rd, Rn, !Rm - * LDRB,STRB scaled register offset: !Rd, !Rn, !Rm - * LDRB,STRB immediate pre/post-indexed: !Rd, !Rn - * LDRB,STRB register pre/post-indexed: !Rd, !Rn, !Rm - * LDRB,STRB scaled register pre/post-indexed: !Rd, !Rn, !Rm - * LDRT,LDRBT,STRBT immediate pre/post-indexed: !Rd, !Rn - * LDRT,LDRBT,STRBT register pre/post-indexed: !Rd, !Rn, !Rm - * LDRT,LDRBT,STRBT scaled register pre/post-indexed: !Rd, !Rn, !Rm - * LDRH/SH/SB/D,STRH/SH/SB/D immediate offset: !Rd, Rn - * LDRH/SH/SB/D,STRH/SH/SB/D register offset: !Rd, Rn, !Rm - * LDRH/SH/SB/D,STRH/SH/SB/D immediate pre/post-indexed: !Rd, !Rn - * LDRH/SH/SB/D,STRH/SH/SB/D register pre/post-indexed: !Rd, !Rn, !Rm - * LDREX: !Rd, !Rn - * MCR/2: !Rd - * MCRR/2,MRRC/2: !Rd, !Rn - * MLA: !Rd, !Rn, !Rm, !Rs - * MOV: Rd - * MRC/2: !Rd (if Rd==15, only changes cond codes, not the register) - * MRS,MSR: !Rd - * MUL: !Rd, !Rm, !Rs - * PKH{BT,TB}: !Rd, !Rn, !Rm - * QDADD,[U]QADD/16/8/SUBX: !Rd, !Rm, !Rn - * QDSUB,[U]QSUB/16/8/ADDX: !Rd, !Rm, !Rn - * REV/16/SH: !Rd, !Rm - * RFE: !Rn - * {S,U}[H]ADD{16,8,SUBX},{S,U}[H]SUB{16,8,ADDX}: !Rd, !Rn, !Rm - * SEL: !Rd, !Rn, !Rm - * SMLA,SMLA{D,W},SMLSD,SMML{A,S}: !Rd, !Rn, !Rm, !Rs - * SMLAL,SMLA{D,LD},SMLSLD,SMMULL,SMULW: !RdHi, !RdLo, !Rm, !Rs - * SMMUL,SMUAD,SMUL,SMUSD: !Rd, !Rm, !Rs - * SSAT/16: !Rd, !Rm - * STM(1/2): !Rn, register_list* (R15 in reg list not recommended) - * STRT immediate pre/post-indexed: Rd*, !Rn - * STRT register pre/post-indexed: Rd*, !Rn, !Rm - * STRT scaled register pre/post-indexed: Rd*, !Rn, !Rm - * STREX: !Rd, !Rn, !Rm - * SWP/B: !Rd, !Rn, !Rm - * {S,U}XTA{B,B16,H}: !Rd, !Rn, !Rm - * {S,U}XT{B,B16,H}: !Rd, !Rm - * UM{AA,LA,UL}L: !RdHi, !RdLo, !Rm, !Rs - * USA{D8,A8,T,T16}: !Rd, !Rm, !Rs - * - * May transfer control by writing R15 (possible mode changes or alternate - * mode accesses marked by "*"): - * ALU op (* with s-bit), B, BL, BKPT, BLX(1/2), BX, BXJ, CPS*, CPY, - * LDM(1), LDM(2/3)*, LDR, MOV, RFE*, SWI* - * - * Instructions that do not take general registers, nor transfer control: - * CDP/2, SETEND, SRS* - */ diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 2ba7deb3072e..1656c87501c0 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -134,7 +134,8 @@ static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { regs->ARM_pc += 4; - p->ainsn.insn_handler(p, regs); + if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) + p->ainsn.insn_handler(p, regs); } /* diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c index 31a316c1777b..0f107dcb0347 100644 --- a/arch/arm/kernel/leds.c +++ b/arch/arm/kernel/leds.c @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -69,29 +70,8 @@ static ssize_t leds_store(struct sys_device *dev, static SYSDEV_ATTR(event, 0200, NULL, leds_store); -static int leds_suspend(struct sys_device *dev, pm_message_t state) -{ - leds_event(led_stop); - return 0; -} - -static int leds_resume(struct sys_device *dev) -{ - leds_event(led_start); - return 0; -} - -static int leds_shutdown(struct sys_device *dev) -{ - leds_event(led_halted); - return 0; -} - static struct sysdev_class leds_sysclass = { .name = "leds", - .shutdown = leds_shutdown, - .suspend = leds_suspend, - .resume = leds_resume, }; static struct sys_device leds_device = { @@ -99,6 +79,28 @@ static struct sys_device leds_device = { .cls = &leds_sysclass, }; +static int leds_suspend(void) +{ + leds_event(led_stop); + return 0; +} + +static void leds_resume(void) +{ + leds_event(led_start); +} + +static void leds_shutdown(void) +{ + leds_event(led_halted); +} + +static struct syscore_ops leds_syscore_ops = { + .shutdown = leds_shutdown, + .suspend = leds_suspend, + .resume = leds_resume, +}; + static int __init leds_init(void) { int ret; @@ -107,6 +109,8 @@ static int __init leds_init(void) ret = sysdev_register(&leds_device); if (ret == 0) ret = sysdev_create_file(&leds_device, &attr_event); + if (ret == 0) + register_syscore_ops(&leds_syscore_ops); return ret; } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 979da3947f42..d53c0abc4dd3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -560,11 +560,6 @@ static int armpmu_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > armpmu->num_events) { - atomic_dec(&active_events); - return -ENOSPC; - } - mutex_lock(&pmu_reserve_mutex); if (atomic_read(&active_events) == 0) { err = armpmu_reserve_hardware(); @@ -746,7 +741,8 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) tail = (struct frame_tail __user *)regs->ARM_fp - 1; - while (tail && !((unsigned long)tail & 0x3)) + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); } diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2bf27f364d09..97260060bf26 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -308,58 +309,6 @@ static int ptrace_write_user(struct task_struct *tsk, unsigned long off, return put_user_reg(tsk, off >> 2, val); } -/* - * Get all user integer registers. - */ -static int ptrace_getregs(struct task_struct *tsk, void __user *uregs) -{ - struct pt_regs *regs = task_pt_regs(tsk); - - return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0; -} - -/* - * Set all user integer registers. - */ -static int ptrace_setregs(struct task_struct *tsk, void __user *uregs) -{ - struct pt_regs newregs; - int ret; - - ret = -EFAULT; - if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) { - struct pt_regs *regs = task_pt_regs(tsk); - - ret = -EINVAL; - if (valid_user_regs(&newregs)) { - *regs = newregs; - ret = 0; - } - } - - return ret; -} - -/* - * Get the child FPU state. - */ -static int ptrace_getfpregs(struct task_struct *tsk, void __user *ufp) -{ - return copy_to_user(ufp, &task_thread_info(tsk)->fpstate, - sizeof(struct user_fp)) ? -EFAULT : 0; -} - -/* - * Set the child FPU state. - */ -static int ptrace_setfpregs(struct task_struct *tsk, void __user *ufp) -{ - struct thread_info *thread = task_thread_info(tsk); - thread->used_cp[1] = thread->used_cp[2] = 1; - return copy_from_user(&thread->fpstate, ufp, - sizeof(struct user_fp)) ? -EFAULT : 0; -} - #ifdef CONFIG_IWMMXT /* @@ -418,56 +367,6 @@ static int ptrace_setcrunchregs(struct task_struct *tsk, void __user *ufp) } #endif -#ifdef CONFIG_VFP -/* - * Get the child VFP state. - */ -static int ptrace_getvfpregs(struct task_struct *tsk, void __user *data) -{ - struct thread_info *thread = task_thread_info(tsk); - union vfp_state *vfp = &thread->vfpstate; - struct user_vfp __user *ufp = data; - - vfp_sync_hwstate(thread); - - /* copy the floating point registers */ - if (copy_to_user(&ufp->fpregs, &vfp->hard.fpregs, - sizeof(vfp->hard.fpregs))) - return -EFAULT; - - /* copy the status and control register */ - if (put_user(vfp->hard.fpscr, &ufp->fpscr)) - return -EFAULT; - - return 0; -} - -/* - * Set the child VFP state. - */ -static int ptrace_setvfpregs(struct task_struct *tsk, void __user *data) -{ - struct thread_info *thread = task_thread_info(tsk); - union vfp_state *vfp = &thread->vfpstate; - struct user_vfp __user *ufp = data; - - vfp_sync_hwstate(thread); - - /* copy the floating point registers */ - if (copy_from_user(&vfp->hard.fpregs, &ufp->fpregs, - sizeof(vfp->hard.fpregs))) - return -EFAULT; - - /* copy the status and control register */ - if (get_user(vfp->hard.fpscr, &ufp->fpscr)) - return -EFAULT; - - vfp_flush_hwstate(thread); - - return 0; -} -#endif - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Convert a virtual register number into an index for a thread_info @@ -694,6 +593,219 @@ out: } #endif +/* regset get/set implementations */ + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs, + 0, sizeof(*regs)); +} + +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct pt_regs newregs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &newregs, + 0, sizeof(newregs)); + if (ret) + return ret; + + if (!valid_user_regs(&newregs)) + return -EINVAL; + + *task_pt_regs(target) = newregs; + return 0; +} + +static int fpa_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &task_thread_info(target)->fpstate, + 0, sizeof(struct user_fp)); +} + +static int fpa_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct thread_info *thread = task_thread_info(target); + + thread->used_cp[1] = thread->used_cp[2] = 1; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &thread->fpstate, + 0, sizeof(struct user_fp)); +} + +#ifdef CONFIG_VFP +/* + * VFP register get/set implementations. + * + * With respect to the kernel, struct user_fp is divided into three chunks: + * 16 or 32 real VFP registers (d0-d15 or d0-31) + * These are transferred to/from the real registers in the task's + * vfp_hard_struct. The number of registers depends on the kernel + * configuration. + * + * 16 or 0 fake VFP registers (d16-d31 or empty) + * i.e., the user_vfp structure has space for 32 registers even if + * the kernel doesn't have them all. + * + * vfp_get() reads this chunk as zero where applicable + * vfp_set() ignores this chunk + * + * 1 word for the FPSCR + * + * The bounds-checking logic built into user_regset_copyout and friends + * means that we can make a simple sequence of calls to map the relevant data + * to/from the specified slice of the user regset structure. + */ +static int vfp_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + struct thread_info *thread = task_thread_info(target); + struct vfp_hard_struct const *vfp = &thread->vfpstate.hard; + const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs); + const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr); + + vfp_sync_hwstate(thread); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &vfp->fpregs, + user_fpregs_offset, + user_fpregs_offset + sizeof(vfp->fpregs)); + if (ret) + return ret; + + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + user_fpregs_offset + sizeof(vfp->fpregs), + user_fpscr_offset); + if (ret) + return ret; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &vfp->fpscr, + user_fpscr_offset, + user_fpscr_offset + sizeof(vfp->fpscr)); +} + +/* + * For vfp_set() a read-modify-write is done on the VFP registers, + * in order to avoid writing back a half-modified set of registers on + * failure. + */ +static int vfp_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct thread_info *thread = task_thread_info(target); + struct vfp_hard_struct new_vfp = thread->vfpstate.hard; + const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs); + const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_vfp.fpregs, + user_fpregs_offset, + user_fpregs_offset + sizeof(new_vfp.fpregs)); + if (ret) + return ret; + + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + user_fpregs_offset + sizeof(new_vfp.fpregs), + user_fpscr_offset); + if (ret) + return ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_vfp.fpscr, + user_fpscr_offset, + user_fpscr_offset + sizeof(new_vfp.fpscr)); + if (ret) + return ret; + + vfp_sync_hwstate(thread); + thread->vfpstate.hard = new_vfp; + vfp_flush_hwstate(thread); + + return 0; +} +#endif /* CONFIG_VFP */ + +enum arm_regset { + REGSET_GPR, + REGSET_FPR, +#ifdef CONFIG_VFP + REGSET_VFP, +#endif +}; + +static const struct user_regset arm_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(u32), + .align = sizeof(u32), + .get = gpr_get, + .set = gpr_set + }, + [REGSET_FPR] = { + /* + * For the FPA regs in fpstate, the real fields are a mixture + * of sizes, so pretend that the registers are word-sized: + */ + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fp) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = fpa_get, + .set = fpa_set + }, +#ifdef CONFIG_VFP + [REGSET_VFP] = { + /* + * Pretend that the VFP regs are word-sized, since the FPSCR is + * a single word dangling at the end of struct user_vfp: + */ + .core_note_type = NT_ARM_VFP, + .n = ARM_VFPREGS_SIZE / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = vfp_get, + .set = vfp_set + }, +#endif /* CONFIG_VFP */ +}; + +static const struct user_regset_view user_arm_view = { + .name = "arm", .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, + .regsets = arm_regsets, .n = ARRAY_SIZE(arm_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_arm_view; +} + long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -710,19 +822,31 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, datap); + ret = copy_regset_to_user(child, + &user_arm_view, REGSET_GPR, + 0, sizeof(struct pt_regs), + datap); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, datap); + ret = copy_regset_from_user(child, + &user_arm_view, REGSET_GPR, + 0, sizeof(struct pt_regs), + datap); break; case PTRACE_GETFPREGS: - ret = ptrace_getfpregs(child, datap); + ret = copy_regset_to_user(child, + &user_arm_view, REGSET_FPR, + 0, sizeof(union fp_state), + datap); break; - + case PTRACE_SETFPREGS: - ret = ptrace_setfpregs(child, datap); + ret = copy_regset_from_user(child, + &user_arm_view, REGSET_FPR, + 0, sizeof(union fp_state), + datap); break; #ifdef CONFIG_IWMMXT @@ -757,22 +881,36 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_VFP case PTRACE_GETVFPREGS: - ret = ptrace_getvfpregs(child, datap); + ret = copy_regset_to_user(child, + &user_arm_view, REGSET_VFP, + 0, ARM_VFPREGS_SIZE, + datap); break; case PTRACE_SETVFPREGS: - ret = ptrace_setvfpregs(child, datap); + ret = copy_regset_from_user(child, + &user_arm_view, REGSET_VFP, + 0, ARM_VFPREGS_SIZE, + datap); break; #endif #ifdef CONFIG_HAVE_HW_BREAKPOINT case PTRACE_GETHBPREGS: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; + ret = ptrace_gethbpregs(child, addr, (unsigned long __user *)data); + ptrace_put_breakpoints(child); break; case PTRACE_SETHBPREGS: + if (ptrace_get_breakpoints(child) < 0) + return -ESRCH; + ret = ptrace_sethbpregs(child, addr, (unsigned long __user *)data); + ptrace_put_breakpoints(child); break; #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 006c1e884eaf..ed11fb08b05a 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include +#include #include #include #include @@ -309,7 +311,7 @@ static void __init cacheid_init(void) */ extern struct proc_info_list *lookup_processor_type(unsigned int); -static void __init early_print(const char *str, ...) +void __init early_print(const char *str, ...) { extern void printascii(const char *); char buf[256]; @@ -439,25 +441,12 @@ void cpu_init(void) : "r14"); } -static struct machine_desc * __init setup_machine(unsigned int nr) +void __init dump_machine_table(void) { - extern struct machine_desc __arch_info_begin[], __arch_info_end[]; struct machine_desc *p; - /* - * locate machine in the list of supported machines. - */ - for (p = __arch_info_begin; p < __arch_info_end; p++) - if (nr == p->nr) { - printk("Machine: %s\n", p->name); - return p; - } - - early_print("\n" - "Error: unrecognized/unsupported machine ID (r1 = 0x%08x).\n\n" - "Available machine support:\n\nID (hex)\tNAME\n", nr); - - for (p = __arch_info_begin; p < __arch_info_end; p++) + early_print("Available machine support:\n\nID (hex)\tNAME\n"); + for_each_machine_desc(p) early_print("%08x\t%s\n", p->nr, p->name); early_print("\nPlease check your kernel config and/or bootloader.\n"); @@ -466,7 +455,7 @@ static struct machine_desc * __init setup_machine(unsigned int nr) /* can't use cpu_relax() here as it may require MMU setup */; } -static int __init arm_add_memory(phys_addr_t start, unsigned long size) +int __init arm_add_memory(phys_addr_t start, unsigned long size) { struct membank *bank = &meminfo.bank[meminfo.nr_banks]; @@ -672,11 +661,16 @@ __tagtable(ATAG_REVISION, parse_tag_revision); static int __init parse_tag_cmdline(const struct tag *tag) { -#ifndef CONFIG_CMDLINE_FORCE - strlcpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); -#else +#if defined(CONFIG_CMDLINE_EXTEND) + strlcat(default_command_line, " ", COMMAND_LINE_SIZE); + strlcat(default_command_line, tag->u.cmdline.cmdline, + COMMAND_LINE_SIZE); +#elif defined(CONFIG_CMDLINE_FORCE) pr_warning("Ignoring tag cmdline (using the default kernel command line)\n"); -#endif /* CONFIG_CMDLINE_FORCE */ +#else + strlcpy(default_command_line, tag->u.cmdline.cmdline, + COMMAND_LINE_SIZE); +#endif return 0; } @@ -796,23 +790,29 @@ static void __init squash_mem_tags(struct tag *tag) tag->hdr.tag = ATAG_NONE; } -void __init setup_arch(char **cmdline_p) +static struct machine_desc * __init setup_machine_tags(unsigned int nr) { struct tag *tags = (struct tag *)&init_tags; - struct machine_desc *mdesc; + struct machine_desc *mdesc = NULL, *p; char *from = default_command_line; init_tags.mem.start = PHYS_OFFSET; - unwind_init(); + /* + * locate machine in the list of supported machines. + */ + for_each_machine_desc(p) + if (nr == p->nr) { + printk("Machine: %s\n", p->name); + mdesc = p; + break; + } - setup_processor(); - mdesc = setup_machine(machine_arch_type); - machine_desc = mdesc; - machine_name = mdesc->name; - - if (mdesc->soft_reboot) - reboot_setup("s"); + if (!mdesc) { + early_print("\nError: unrecognized/unsupported machine ID" + " (r1 = 0x%08x).\n\n", nr); + dump_machine_table(); /* does not return */ + } if (__atags_pointer) tags = phys_to_virt(__atags_pointer); @@ -844,8 +844,17 @@ void __init setup_arch(char **cmdline_p) if (tags->hdr.tag != ATAG_CORE) convert_to_tag_list(tags); #endif - if (tags->hdr.tag != ATAG_CORE) + + if (tags->hdr.tag != ATAG_CORE) { +#if defined(CONFIG_OF) + /* + * If CONFIG_OF is set, then assume this is a reasonably + * modern system that should pass boot parameters + */ + early_print("Warning: Neither atags nor dtb found\n"); +#endif tags = (struct tag *)&init_tags; + } if (mdesc->fixup) mdesc->fixup(mdesc, tags, &from, &meminfo); @@ -857,14 +866,34 @@ void __init setup_arch(char **cmdline_p) parse_tags(tags); } + /* parse_early_param needs a boot_command_line */ + strlcpy(boot_command_line, from, COMMAND_LINE_SIZE); + + return mdesc; +} + + +void __init setup_arch(char **cmdline_p) +{ + struct machine_desc *mdesc; + + unwind_init(); + + setup_processor(); + mdesc = setup_machine_fdt(__atags_pointer); + if (!mdesc) + mdesc = setup_machine_tags(machine_arch_type); + machine_desc = mdesc; + machine_name = mdesc->name; + + if (mdesc->soft_reboot) + reboot_setup("s"); + init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = (unsigned long) _end; - /* parse_early_param needs a boot_command_line */ - strlcpy(boot_command_line, from, COMMAND_LINE_SIZE); - /* populate cmd_line too for later use, preserving boot_command_line */ strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; @@ -876,6 +905,8 @@ void __init setup_arch(char **cmdline_p) paging_init(mdesc); request_standard_resources(mdesc); + unflatten_device_tree(); + #ifdef CONFIG_SMP if (is_smp()) smp_init_cpus(); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index cb8398317644..0340224cf73c 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -597,45 +597,19 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, return err; } -static inline void setup_syscall_restart(struct pt_regs *regs) -{ - regs->ARM_r0 = regs->ARM_ORIG_r0; - regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; -} - /* * OK, we're invoking a handler */ static int handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs, int syscall) + struct pt_regs * regs) { struct thread_info *thread = current_thread_info(); struct task_struct *tsk = current; int usig = sig; int ret; - /* - * If we were from a system call, check for system call restarting... - */ - if (syscall) { - switch (regs->ARM_r0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->ARM_r0 = -EINTR; - break; - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ARM_r0 = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - setup_syscall_restart(regs); - } - } - /* * translate the signal */ @@ -685,6 +659,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, */ static void do_signal(struct pt_regs *regs, int syscall) { + unsigned int retval = 0, continue_addr = 0, restart_addr = 0; struct k_sigaction ka; siginfo_t info; int signr; @@ -698,18 +673,61 @@ static void do_signal(struct pt_regs *regs, int syscall) if (!user_mode(regs)) return; + /* + * If we were from a system call, check for system call restarting... + */ + if (syscall) { + continue_addr = regs->ARM_pc; + restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4); + retval = regs->ARM_r0; + + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PSW. + */ + switch (retval) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->ARM_r0 = regs->ARM_ORIG_r0; + regs->ARM_pc = restart_addr; + break; + case -ERESTART_RESTARTBLOCK: + regs->ARM_r0 = -EINTR; + break; + } + } + if (try_to_freeze()) goto no_signal; + /* + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... + */ signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { sigset_t *oldset; + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (regs->ARM_pc == restart_addr) { + if (retval == -ERESTARTNOHAND + || (retval == -ERESTARTSYS + && !(ka.sa.sa_flags & SA_RESTART))) { + regs->ARM_r0 = -EINTR; + regs->ARM_pc = continue_addr; + } + } + if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else oldset = ¤t->blocked; - if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) { + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) { /* * A signal was successfully delivered; the saved * sigmask will have been stored in the signal frame, @@ -723,11 +741,14 @@ static void do_signal(struct pt_regs *regs, int syscall) } no_signal: - /* - * No signal to deliver to the process - restart the syscall. - */ if (syscall) { - if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) { + /* + * Handle restarting a different system call. As above, + * if a debugger has chosen to restart at a different PC, + * ignore the restart. + */ + if (retval == -ERESTART_RESTARTBLOCK + && regs->ARM_pc == continue_addr) { if (thumb_mode(regs)) { regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE; regs->ARM_pc -= 2; @@ -750,11 +771,6 @@ static void do_signal(struct pt_regs *regs, int syscall) #endif } } - if (regs->ARM_r0 == -ERESTARTNOHAND || - regs->ARM_r0 == -ERESTARTSYS || - regs->ARM_r0 == -ERESTARTNOINTR) { - setup_syscall_restart(regs); - } /* If there's no signal to deliver, we just put the saved sigmask * back. diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8fe05ad932e4..344e52b16c8c 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -105,6 +105,7 @@ int __cpuinit __cpu_up(unsigned int cpu) */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; secondary_data.pgdir = virt_to_phys(pgd); + secondary_data.swapper_pg_dir = virt_to_phys(swapper_pg_dir); __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); @@ -376,6 +377,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } } +static void (*smp_cross_call)(const struct cpumask *, unsigned int); + +void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) +{ + smp_cross_call = fn; +} + void arch_send_call_function_ipi_mask(const struct cpumask *mask) { smp_cross_call(mask, IPI_CALL_FUNC); @@ -479,7 +487,7 @@ static void broadcast_timer_set_mode(enum clock_event_mode mode, { } -static void broadcast_timer_setup(struct clock_event_device *evt) +static void __cpuinit broadcast_timer_setup(struct clock_event_device *evt) { evt->name = "dummy_timer"; evt->features = CLOCK_EVT_FEAT_ONESHOT | @@ -560,10 +568,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) break; case IPI_RESCHEDULE: - /* - * nothing more to do - eveything is - * done on the interrupt return path - */ + scheduler_ipi(); break; case IPI_CALL_FUNC: diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 4ad8da15ef2b..af0aaebf4de6 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -311,7 +311,7 @@ asmlinkage long sys_oabi_semtimedop(int semid, long err; int i; - if (nsops < 1) + if (nsops < 1 || nsops > SEMOPM) return -EINVAL; sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); if (!sops) diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 1ff46cabc7ef..cb634c3e28e9 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -115,48 +115,37 @@ void timer_tick(void) #endif #if defined(CONFIG_PM) && !defined(CONFIG_GENERIC_CLOCKEVENTS) -static int timer_suspend(struct sys_device *dev, pm_message_t state) +static int timer_suspend(void) { - struct sys_timer *timer = container_of(dev, struct sys_timer, dev); - - if (timer->suspend != NULL) - timer->suspend(); + if (system_timer->suspend) + system_timer->suspend(); return 0; } -static int timer_resume(struct sys_device *dev) +static void timer_resume(void) { - struct sys_timer *timer = container_of(dev, struct sys_timer, dev); - - if (timer->resume != NULL) - timer->resume(); - - return 0; + if (system_timer->resume) + system_timer->resume(); } #else #define timer_suspend NULL #define timer_resume NULL #endif -static struct sysdev_class timer_sysclass = { - .name = "timer", +static struct syscore_ops timer_syscore_ops = { .suspend = timer_suspend, .resume = timer_resume, }; -static int __init timer_init_sysfs(void) +static int __init timer_init_syscore_ops(void) { - int ret = sysdev_class_register(&timer_sysclass); - if (ret == 0) { - system_timer->dev.cls = &timer_sysclass; - ret = sysdev_register(&system_timer->dev); - } + register_syscore_ops(&timer_syscore_ops); - return ret; + return 0; } -device_initcall(timer_init_sysfs); +device_initcall(timer_init_syscore_ops); void __init time_init(void) { diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 3b54ad19d489..6807cb1e76dd 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -139,7 +139,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) fs = get_fs(); set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { + for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; if (thumb) @@ -234,7 +234,6 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", str, err, ++die_counter); - sysfs_printk_last_file(); /* trap and error numbers are mostly meaningless on ARM */ ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV); @@ -564,7 +563,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) if (!pmd_present(*pmd)) goto bad_access; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - if (!pte_present(*pte) || !pte_dirty(*pte)) { + if (!pte_present(*pte) || !pte_write(*pte) || !pte_dirty(*pte)) { pte_unmap_unlock(pte, ptl); goto bad_access; } diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b4348e62ef06..e5287f21badc 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -82,7 +82,7 @@ SECTIONS #endif } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) #ifndef CONFIG_XIP_KERNEL . = ALIGN(PAGE_SIZE); diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 6dc06487f3c3..c562f649734c 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -35,7 +35,7 @@ Boston, MA 02111-1307, USA. */ #include #include - +#include .macro ARM_DIV_BODY dividend, divisor, result, curbit @@ -207,6 +207,7 @@ Boston, MA 02111-1307, USA. */ ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) subs r2, r1, #1 moveq pc, lr @@ -230,10 +231,12 @@ ENTRY(__aeabi_uidiv) mov r0, r0, lsr r2 mov pc, lr +UNWIND(.fnend) ENDPROC(__udivsi3) ENDPROC(__aeabi_uidiv) ENTRY(__umodsi3) +UNWIND(.fnstart) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 @@ -247,10 +250,12 @@ ENTRY(__umodsi3) mov pc, lr +UNWIND(.fnend) ENDPROC(__umodsi3) ENTRY(__divsi3) ENTRY(__aeabi_idiv) +UNWIND(.fnstart) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. @@ -287,10 +292,12 @@ ENTRY(__aeabi_idiv) rsbmi r0, r0, #0 mov pc, lr +UNWIND(.fnend) ENDPROC(__divsi3) ENDPROC(__aeabi_idiv) ENTRY(__modsi3) +UNWIND(.fnstart) cmp r1, #0 beq Ldiv0 @@ -310,11 +317,14 @@ ENTRY(__modsi3) rsbmi r0, r0, #0 mov pc, lr +UNWIND(.fnend) ENDPROC(__modsi3) #ifdef CONFIG_AEABI ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_uidiv @@ -323,10 +333,12 @@ ENTRY(__aeabi_uidivmod) sub r1, r1, r3 mov pc, lr +UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) ENTRY(__aeabi_idivmod) - +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) stmfd sp!, {r0, r1, ip, lr} bl __aeabi_idiv ldmfd sp!, {r1, r2, ip, lr} @@ -334,15 +346,18 @@ ENTRY(__aeabi_idivmod) sub r1, r1, r3 mov pc, lr +UNWIND(.fnend) ENDPROC(__aeabi_idivmod) #endif Ldiv0: - +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) str lr, [sp, #-8]! bl __div0 mov r0, #0 @ About as wrong as it could be. ldr pc, [sp], #8 - - +UNWIND(.fnend) +ENDPROC(Ldiv0) diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig index 19390231a0e9..22484670e7ba 100644 --- a/arch/arm/mach-at91/Kconfig +++ b/arch/arm/mach-at91/Kconfig @@ -3,9 +3,6 @@ if ARCH_AT91 config HAVE_AT91_DATAFLASH_CARD bool -config HAVE_NAND_ATMEL_BUSWIDTH_16 - bool - config HAVE_AT91_USART3 bool @@ -83,11 +80,7 @@ config ARCH_AT91CAP9 select CPU_ARM926T select GENERIC_CLOCKEVENTS select HAVE_FB_ATMEL - -config ARCH_AT572D940HF - bool "AT572D940HF" - select CPU_ARM926T - select GENERIC_CLOCKEVENTS + select HAVE_NET_MACB config ARCH_AT91X40 bool "AT91x40" @@ -208,7 +201,6 @@ comment "AT91SAM9260 / AT91SAM9XE Board Type" config MACH_AT91SAM9260EK bool "Atmel AT91SAM9260-EK / AT91SAM9XE Evaluation Kit" select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91SAM9260-EK or AT91SAM9XE Evaluation Kit @@ -269,7 +261,6 @@ comment "AT91SAM9261 Board Type" config MACH_AT91SAM9261EK bool "Atmel AT91SAM9261-EK Evaluation Kit" select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91SAM9261-EK Evaluation Kit. @@ -285,7 +276,6 @@ comment "AT91SAM9G10 Board Type" config MACH_AT91SAM9G10EK bool "Atmel AT91SAM9G10-EK Evaluation Kit" select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91SAM9G10-EK Evaluation Kit. @@ -301,7 +291,6 @@ comment "AT91SAM9263 Board Type" config MACH_AT91SAM9263EK bool "Atmel AT91SAM9263-EK Evaluation Kit" select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91SAM9263-EK Evaluation Kit. @@ -342,7 +331,6 @@ comment "AT91SAM9G20 Board Type" config MACH_AT91SAM9G20EK bool "Atmel AT91SAM9G20-EK Evaluation Kit" select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91SAM9G20-EK Evaluation Kit that embeds only one SD/MMC slot. @@ -350,7 +338,6 @@ config MACH_AT91SAM9G20EK config MACH_AT91SAM9G20EK_2MMC depends on MACH_AT91SAM9G20EK bool "Atmel AT91SAM9G20-EK Evaluation Kit with 2 SD/MMC Slots" - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using an Atmel AT91SAM9G20-EK Evaluation Kit with 2 SD/MMC Slots. This is the case for AT91SAM9G20-EK rev. C and @@ -415,7 +402,6 @@ comment "AT91SAM9G45 Board Type" config MACH_AT91SAM9M10G45EK bool "Atmel AT91SAM9M10G45-EK Evaluation Kits" - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91SAM9G45-EKES Evaluation Kit. "ES" at the end of the name means that this board is an @@ -432,7 +418,6 @@ comment "AT91CAP9 Board Type" config MACH_AT91CAP9ADK bool "Atmel AT91CAP9A-DK Evaluation Kit" select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 help Select this if you are using Atmel's AT91CAP9A-DK Evaluation Kit. @@ -441,23 +426,6 @@ endif # ---------------------------------------------------------- -if ARCH_AT572D940HF - -comment "AT572D940HF Board Type" - -config MACH_AT572D940HFEB - bool "AT572D940HF-EK" - depends on ARCH_AT572D940HF - select HAVE_AT91_DATAFLASH_CARD - select HAVE_NAND_ATMEL_BUSWIDTH_16 - help - Select this if you are using Atmel's AT572D940HF-EK evaluation kit. - - -endif - -# ---------------------------------------------------------- - if ARCH_AT91X40 comment "AT91X40 Board Type" @@ -482,13 +450,6 @@ config MTD_AT91_DATAFLASH_CARD help Enable support for the DataFlash card. -config MTD_NAND_ATMEL_BUSWIDTH_16 - bool "Enable 16-bit data bus interface to NAND flash" - depends on HAVE_NAND_ATMEL_BUSWIDTH_16 - help - On AT91SAM926x boards both types of NAND flash can be present - (8 and 16 bit data bus width). - # ---------------------------------------------------------- comment "AT91 Feature Selections" diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index a83835e0c185..96966231920c 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_ARCH_AT91SAM9RL) += at91sam9rl.o at91sam926x_time.o at91sam9rl_devi obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o at91sam9_alt_reset.o obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91CAP9) += at91cap9.o at91sam926x_time.o at91cap9_devices.o sam9_smc.o -obj-$(CONFIG_ARCH_AT572D940HF) += at572d940hf.o at91sam926x_time.o at572d940hf_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91X40) += at91x40.o at91x40_time.o # AT91RM9200 board-specific support @@ -78,9 +77,6 @@ obj-$(CONFIG_MACH_AT91SAM9M10G45EK) += board-sam9m10g45ek.o # AT91CAP9 board-specific support obj-$(CONFIG_MACH_AT91CAP9ADK) += board-cap9adk.o -# AT572D940HF board-specific support -obj-$(CONFIG_MACH_AT572D940HFEB) += board-at572d940hf_ek.o - # AT91X40 board-specific support obj-$(CONFIG_MACH_AT91EB01) += board-eb01.o diff --git a/arch/arm/mach-at91/at572d940hf.c b/arch/arm/mach-at91/at572d940hf.c deleted file mode 100644 index a6b9c68c003a..000000000000 --- a/arch/arm/mach-at91/at572d940hf.c +++ /dev/null @@ -1,377 +0,0 @@ -/* - * arch/arm/mach-at91/at572d940hf.c - * - * Antonio R. Costa - * Copyright (C) 2008 Atmel - * - * Copyright (C) 2005 SAN People - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include - -#include -#include -#include -#include -#include -#include - -#include "generic.h" -#include "clock.h" - -static struct map_desc at572d940hf_io_desc[] __initdata = { - { - .virtual = AT91_VA_BASE_SYS, - .pfn = __phys_to_pfn(AT91_BASE_SYS), - .length = SZ_16K, - .type = MT_DEVICE, - }, { - .virtual = AT91_IO_VIRT_BASE - AT572D940HF_SRAM_SIZE, - .pfn = __phys_to_pfn(AT572D940HF_SRAM_BASE), - .length = AT572D940HF_SRAM_SIZE, - .type = MT_DEVICE, - }, -}; - -/* -------------------------------------------------------------------- - * Clocks - * -------------------------------------------------------------------- */ - -/* - * The peripheral clocks. - */ -static struct clk pioA_clk = { - .name = "pioA_clk", - .pmc_mask = 1 << AT572D940HF_ID_PIOA, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk pioB_clk = { - .name = "pioB_clk", - .pmc_mask = 1 << AT572D940HF_ID_PIOB, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk pioC_clk = { - .name = "pioC_clk", - .pmc_mask = 1 << AT572D940HF_ID_PIOC, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk macb_clk = { - .name = "macb_clk", - .pmc_mask = 1 << AT572D940HF_ID_EMAC, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk usart0_clk = { - .name = "usart0_clk", - .pmc_mask = 1 << AT572D940HF_ID_US0, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk usart1_clk = { - .name = "usart1_clk", - .pmc_mask = 1 << AT572D940HF_ID_US1, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk usart2_clk = { - .name = "usart2_clk", - .pmc_mask = 1 << AT572D940HF_ID_US2, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk mmc_clk = { - .name = "mci_clk", - .pmc_mask = 1 << AT572D940HF_ID_MCI, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk udc_clk = { - .name = "udc_clk", - .pmc_mask = 1 << AT572D940HF_ID_UDP, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk twi0_clk = { - .name = "twi0_clk", - .pmc_mask = 1 << AT572D940HF_ID_TWI0, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk spi0_clk = { - .name = "spi0_clk", - .pmc_mask = 1 << AT572D940HF_ID_SPI0, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk spi1_clk = { - .name = "spi1_clk", - .pmc_mask = 1 << AT572D940HF_ID_SPI1, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk ssc0_clk = { - .name = "ssc0_clk", - .pmc_mask = 1 << AT572D940HF_ID_SSC0, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk ssc1_clk = { - .name = "ssc1_clk", - .pmc_mask = 1 << AT572D940HF_ID_SSC1, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk ssc2_clk = { - .name = "ssc2_clk", - .pmc_mask = 1 << AT572D940HF_ID_SSC2, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk tc0_clk = { - .name = "tc0_clk", - .pmc_mask = 1 << AT572D940HF_ID_TC0, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk tc1_clk = { - .name = "tc1_clk", - .pmc_mask = 1 << AT572D940HF_ID_TC1, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk tc2_clk = { - .name = "tc2_clk", - .pmc_mask = 1 << AT572D940HF_ID_TC2, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk ohci_clk = { - .name = "ohci_clk", - .pmc_mask = 1 << AT572D940HF_ID_UHP, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk ssc3_clk = { - .name = "ssc3_clk", - .pmc_mask = 1 << AT572D940HF_ID_SSC3, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk twi1_clk = { - .name = "twi1_clk", - .pmc_mask = 1 << AT572D940HF_ID_TWI1, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk can0_clk = { - .name = "can0_clk", - .pmc_mask = 1 << AT572D940HF_ID_CAN0, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk can1_clk = { - .name = "can1_clk", - .pmc_mask = 1 << AT572D940HF_ID_CAN1, - .type = CLK_TYPE_PERIPHERAL, -}; -static struct clk mAgicV_clk = { - .name = "mAgicV_clk", - .pmc_mask = 1 << AT572D940HF_ID_MSIRQ0, - .type = CLK_TYPE_PERIPHERAL, -}; - - -static struct clk *periph_clocks[] __initdata = { - &pioA_clk, - &pioB_clk, - &pioC_clk, - &macb_clk, - &usart0_clk, - &usart1_clk, - &usart2_clk, - &mmc_clk, - &udc_clk, - &twi0_clk, - &spi0_clk, - &spi1_clk, - &ssc0_clk, - &ssc1_clk, - &ssc2_clk, - &tc0_clk, - &tc1_clk, - &tc2_clk, - &ohci_clk, - &ssc3_clk, - &twi1_clk, - &can0_clk, - &can1_clk, - &mAgicV_clk, - /* irq0 .. irq2 */ -}; - -/* - * The five programmable clocks. - * You must configure pin multiplexing to bring these signals out. - */ -static struct clk pck0 = { - .name = "pck0", - .pmc_mask = AT91_PMC_PCK0, - .type = CLK_TYPE_PROGRAMMABLE, - .id = 0, -}; -static struct clk pck1 = { - .name = "pck1", - .pmc_mask = AT91_PMC_PCK1, - .type = CLK_TYPE_PROGRAMMABLE, - .id = 1, -}; -static struct clk pck2 = { - .name = "pck2", - .pmc_mask = AT91_PMC_PCK2, - .type = CLK_TYPE_PROGRAMMABLE, - .id = 2, -}; -static struct clk pck3 = { - .name = "pck3", - .pmc_mask = AT91_PMC_PCK3, - .type = CLK_TYPE_PROGRAMMABLE, - .id = 3, -}; - -static struct clk mAgicV_mem_clk = { - .name = "mAgicV_mem_clk", - .pmc_mask = AT91_PMC_PCK4, - .type = CLK_TYPE_PROGRAMMABLE, - .id = 4, -}; - -/* HClocks */ -static struct clk hck0 = { - .name = "hck0", - .pmc_mask = AT91_PMC_HCK0, - .type = CLK_TYPE_SYSTEM, - .id = 0, -}; -static struct clk hck1 = { - .name = "hck1", - .pmc_mask = AT91_PMC_HCK1, - .type = CLK_TYPE_SYSTEM, - .id = 1, -}; - -static void __init at572d940hf_register_clocks(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) - clk_register(periph_clocks[i]); - - clk_register(&pck0); - clk_register(&pck1); - clk_register(&pck2); - clk_register(&pck3); - clk_register(&mAgicV_mem_clk); - - clk_register(&hck0); - clk_register(&hck1); -} - -/* -------------------------------------------------------------------- - * GPIO - * -------------------------------------------------------------------- */ - -static struct at91_gpio_bank at572d940hf_gpio[] = { - { - .id = AT572D940HF_ID_PIOA, - .offset = AT91_PIOA, - .clock = &pioA_clk, - }, { - .id = AT572D940HF_ID_PIOB, - .offset = AT91_PIOB, - .clock = &pioB_clk, - }, { - .id = AT572D940HF_ID_PIOC, - .offset = AT91_PIOC, - .clock = &pioC_clk, - } -}; - -static void at572d940hf_reset(void) -{ - at91_sys_write(AT91_RSTC_CR, AT91_RSTC_KEY | AT91_RSTC_PROCRST | AT91_RSTC_PERRST); -} - - -/* -------------------------------------------------------------------- - * AT572D940HF processor initialization - * -------------------------------------------------------------------- */ - -void __init at572d940hf_initialize(unsigned long main_clock) -{ - /* Map peripherals */ - iotable_init(at572d940hf_io_desc, ARRAY_SIZE(at572d940hf_io_desc)); - - at91_arch_reset = at572d940hf_reset; - at91_extern_irq = (1 << AT572D940HF_ID_IRQ0) | (1 << AT572D940HF_ID_IRQ1) - | (1 << AT572D940HF_ID_IRQ2); - - /* Init clock subsystem */ - at91_clock_init(main_clock); - - /* Register the processor-specific clocks */ - at572d940hf_register_clocks(); - - /* Register GPIO subsystem */ - at91_gpio_init(at572d940hf_gpio, 3); -} - -/* -------------------------------------------------------------------- - * Interrupt initialization - * -------------------------------------------------------------------- */ - -/* - * The default interrupt priority levels (0 = lowest, 7 = highest). - */ -static unsigned int at572d940hf_default_irq_priority[NR_AIC_IRQS] __initdata = { - 7, /* Advanced Interrupt Controller */ - 7, /* System Peripherals */ - 0, /* Parallel IO Controller A */ - 0, /* Parallel IO Controller B */ - 0, /* Parallel IO Controller C */ - 3, /* Ethernet */ - 6, /* USART 0 */ - 6, /* USART 1 */ - 6, /* USART 2 */ - 0, /* Multimedia Card Interface */ - 4, /* USB Device Port */ - 0, /* Two-Wire Interface 0 */ - 6, /* Serial Peripheral Interface 0 */ - 6, /* Serial Peripheral Interface 1 */ - 5, /* Serial Synchronous Controller 0 */ - 5, /* Serial Synchronous Controller 1 */ - 5, /* Serial Synchronous Controller 2 */ - 0, /* Timer Counter 0 */ - 0, /* Timer Counter 1 */ - 0, /* Timer Counter 2 */ - 3, /* USB Host port */ - 3, /* Serial Synchronous Controller 3 */ - 0, /* Two-Wire Interface 1 */ - 0, /* CAN Controller 0 */ - 0, /* CAN Controller 1 */ - 0, /* mAgicV HALT line */ - 0, /* mAgicV SIRQ0 line */ - 0, /* mAgicV exception line */ - 0, /* mAgicV end of DMA line */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ - 0, /* Advanced Interrupt Controller */ -}; - -void __init at572d940hf_init_interrupts(unsigned int priority[NR_AIC_IRQS]) -{ - if (!priority) - priority = at572d940hf_default_irq_priority; - - /* Initialize the AIC interrupt controller */ - at91_aic_init(priority); - - /* Enable GPIO interrupts */ - at91_gpio_irq_setup(); -} - diff --git a/arch/arm/mach-at91/at572d940hf_devices.c b/arch/arm/mach-at91/at572d940hf_devices.c deleted file mode 100644 index 0fc20a240782..000000000000 --- a/arch/arm/mach-at91/at572d940hf_devices.c +++ /dev/null @@ -1,970 +0,0 @@ -/* - * arch/arm/mach-at91/at572d940hf_devices.c - * - * Copyright (C) 2008 Atmel Antonio R. Costa - * Copyright (C) 2005 Thibaut VARENE - * Copyright (C) 2005 David Brownell - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -#include "generic.h" -#include "sam9_smc.h" - - -/* -------------------------------------------------------------------- - * USB Host - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE) -static u64 ohci_dmamask = DMA_BIT_MASK(32); -static struct at91_usbh_data usbh_data; - -static struct resource usbh_resources[] = { - [0] = { - .start = AT572D940HF_UHP_BASE, - .end = AT572D940HF_UHP_BASE + SZ_1M - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_UHP, - .end = AT572D940HF_ID_UHP, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_usbh_device = { - .name = "at91_ohci", - .id = -1, - .dev = { - .dma_mask = &ohci_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &usbh_data, - }, - .resource = usbh_resources, - .num_resources = ARRAY_SIZE(usbh_resources), -}; - -void __init at91_add_device_usbh(struct at91_usbh_data *data) -{ - if (!data) - return; - - usbh_data = *data; - platform_device_register(&at572d940hf_usbh_device); - -} -#else -void __init at91_add_device_usbh(struct at91_usbh_data *data) {} -#endif - - -/* -------------------------------------------------------------------- - * USB Device (Gadget) - * -------------------------------------------------------------------- */ - -#ifdef CONFIG_USB_GADGET_AT91 -static struct at91_udc_data udc_data; - -static struct resource udc_resources[] = { - [0] = { - .start = AT572D940HF_BASE_UDP, - .end = AT572D940HF_BASE_UDP + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_UDP, - .end = AT572D940HF_ID_UDP, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_udc_device = { - .name = "at91_udc", - .id = -1, - .dev = { - .platform_data = &udc_data, - }, - .resource = udc_resources, - .num_resources = ARRAY_SIZE(udc_resources), -}; - -void __init at91_add_device_udc(struct at91_udc_data *data) -{ - if (!data) - return; - - if (data->vbus_pin) { - at91_set_gpio_input(data->vbus_pin, 0); - at91_set_deglitch(data->vbus_pin, 1); - } - - /* Pullup pin is handled internally */ - - udc_data = *data; - platform_device_register(&at572d940hf_udc_device); -} -#else -void __init at91_add_device_udc(struct at91_udc_data *data) {} -#endif - - -/* -------------------------------------------------------------------- - * Ethernet - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_MACB) || defined(CONFIG_MACB_MODULE) -static u64 eth_dmamask = DMA_BIT_MASK(32); -static struct at91_eth_data eth_data; - -static struct resource eth_resources[] = { - [0] = { - .start = AT572D940HF_BASE_EMAC, - .end = AT572D940HF_BASE_EMAC + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_EMAC, - .end = AT572D940HF_ID_EMAC, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_eth_device = { - .name = "macb", - .id = -1, - .dev = { - .dma_mask = ð_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = ð_data, - }, - .resource = eth_resources, - .num_resources = ARRAY_SIZE(eth_resources), -}; - -void __init at91_add_device_eth(struct at91_eth_data *data) -{ - if (!data) - return; - - if (data->phy_irq_pin) { - at91_set_gpio_input(data->phy_irq_pin, 0); - at91_set_deglitch(data->phy_irq_pin, 1); - } - - /* Only RMII is supported */ - data->is_rmii = 1; - - /* Pins used for RMII */ - at91_set_A_periph(AT91_PIN_PA16, 0); /* ETXCK_EREFCK */ - at91_set_A_periph(AT91_PIN_PA17, 0); /* ERXDV */ - at91_set_A_periph(AT91_PIN_PA18, 0); /* ERX0 */ - at91_set_A_periph(AT91_PIN_PA19, 0); /* ERX1 */ - at91_set_A_periph(AT91_PIN_PA20, 0); /* ERXER */ - at91_set_A_periph(AT91_PIN_PA23, 0); /* ETXEN */ - at91_set_A_periph(AT91_PIN_PA21, 0); /* ETX0 */ - at91_set_A_periph(AT91_PIN_PA22, 0); /* ETX1 */ - at91_set_A_periph(AT91_PIN_PA13, 0); /* EMDIO */ - at91_set_A_periph(AT91_PIN_PA14, 0); /* EMDC */ - - eth_data = *data; - platform_device_register(&at572d940hf_eth_device); -} -#else -void __init at91_add_device_eth(struct at91_eth_data *data) {} -#endif - - -/* -------------------------------------------------------------------- - * MMC / SD - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_MMC_AT91) || defined(CONFIG_MMC_AT91_MODULE) -static u64 mmc_dmamask = DMA_BIT_MASK(32); -static struct at91_mmc_data mmc_data; - -static struct resource mmc_resources[] = { - [0] = { - .start = AT572D940HF_BASE_MCI, - .end = AT572D940HF_BASE_MCI + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_MCI, - .end = AT572D940HF_ID_MCI, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_mmc_device = { - .name = "at91_mci", - .id = -1, - .dev = { - .dma_mask = &mmc_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &mmc_data, - }, - .resource = mmc_resources, - .num_resources = ARRAY_SIZE(mmc_resources), -}; - -void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) -{ - if (!data) - return; - - /* input/irq */ - if (data->det_pin) { - at91_set_gpio_input(data->det_pin, 1); - at91_set_deglitch(data->det_pin, 1); - } - if (data->wp_pin) - at91_set_gpio_input(data->wp_pin, 1); - if (data->vcc_pin) - at91_set_gpio_output(data->vcc_pin, 0); - - /* CLK */ - at91_set_A_periph(AT91_PIN_PC22, 0); - - /* CMD */ - at91_set_A_periph(AT91_PIN_PC23, 1); - - /* DAT0, maybe DAT1..DAT3 */ - at91_set_A_periph(AT91_PIN_PC24, 1); - if (data->wire4) { - at91_set_A_periph(AT91_PIN_PC25, 1); - at91_set_A_periph(AT91_PIN_PC26, 1); - at91_set_A_periph(AT91_PIN_PC27, 1); - } - - mmc_data = *data; - platform_device_register(&at572d940hf_mmc_device); -} -#else -void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) {} -#endif - - -/* -------------------------------------------------------------------- - * NAND / SmartMedia - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_MTD_NAND_ATMEL) || defined(CONFIG_MTD_NAND_ATMEL_MODULE) -static struct atmel_nand_data nand_data; - -#define NAND_BASE AT91_CHIPSELECT_3 - -static struct resource nand_resources[] = { - { - .start = NAND_BASE, - .end = NAND_BASE + SZ_256M - 1, - .flags = IORESOURCE_MEM, - } -}; - -static struct platform_device at572d940hf_nand_device = { - .name = "atmel_nand", - .id = -1, - .dev = { - .platform_data = &nand_data, - }, - .resource = nand_resources, - .num_resources = ARRAY_SIZE(nand_resources), -}; - -void __init at91_add_device_nand(struct atmel_nand_data *data) -{ - unsigned long csa; - - if (!data) - return; - - csa = at91_sys_read(AT91_MATRIX_EBICSA); - at91_sys_write(AT91_MATRIX_EBICSA, csa | AT91_MATRIX_CS3A_SMC_SMARTMEDIA); - - /* enable pin */ - if (data->enable_pin) - at91_set_gpio_output(data->enable_pin, 1); - - /* ready/busy pin */ - if (data->rdy_pin) - at91_set_gpio_input(data->rdy_pin, 1); - - /* card detect pin */ - if (data->det_pin) - at91_set_gpio_input(data->det_pin, 1); - - at91_set_A_periph(AT91_PIN_PB28, 0); /* A[22] */ - at91_set_B_periph(AT91_PIN_PA28, 0); /* NANDOE */ - at91_set_B_periph(AT91_PIN_PA29, 0); /* NANDWE */ - - nand_data = *data; - platform_device_register(&at572d940hf_nand_device); -} - -#else -void __init at91_add_device_nand(struct atmel_nand_data *data) {} -#endif - - -/* -------------------------------------------------------------------- - * TWI (i2c) - * -------------------------------------------------------------------- */ - -/* - * Prefer the GPIO code since the TWI controller isn't robust - * (gets overruns and underruns under load) and can only issue - * repeated STARTs in one scenario (the driver doesn't yet handle them). - */ - -#if defined(CONFIG_I2C_GPIO) || defined(CONFIG_I2C_GPIO_MODULE) - -static struct i2c_gpio_platform_data pdata = { - .sda_pin = AT91_PIN_PC7, - .sda_is_open_drain = 1, - .scl_pin = AT91_PIN_PC8, - .scl_is_open_drain = 1, - .udelay = 2, /* ~100 kHz */ -}; - -static struct platform_device at572d940hf_twi_device { - .name = "i2c-gpio", - .id = -1, - .dev.platform_data = &pdata, -}; - -void __init at91_add_device_i2c(struct i2c_board_info *devices, int nr_devices) -{ - at91_set_GPIO_periph(AT91_PIN_PC7, 1); /* TWD (SDA) */ - at91_set_multi_drive(AT91_PIN_PC7, 1); - - at91_set_GPIO_periph(AT91_PIN_PA8, 1); /* TWCK (SCL) */ - at91_set_multi_drive(AT91_PIN_PC8, 1); - - i2c_register_board_info(0, devices, nr_devices); - platform_device_register(&at572d940hf_twi_device); -} - -#elif defined(CONFIG_I2C_AT91) || defined(CONFIG_I2C_AT91_MODULE) - -static struct resource twi0_resources[] = { - [0] = { - .start = AT572D940HF_BASE_TWI0, - .end = AT572D940HF_BASE_TWI0 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_TWI0, - .end = AT572D940HF_ID_TWI0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_twi0_device = { - .name = "at91_i2c", - .id = 0, - .resource = twi0_resources, - .num_resources = ARRAY_SIZE(twi0_resources), -}; - -static struct resource twi1_resources[] = { - [0] = { - .start = AT572D940HF_BASE_TWI1, - .end = AT572D940HF_BASE_TWI1 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_TWI1, - .end = AT572D940HF_ID_TWI1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_twi1_device = { - .name = "at91_i2c", - .id = 1, - .resource = twi1_resources, - .num_resources = ARRAY_SIZE(twi1_resources), -}; - -void __init at91_add_device_i2c(struct i2c_board_info *devices, int nr_devices) -{ - /* pins used for TWI0 interface */ - at91_set_A_periph(AT91_PIN_PC7, 0); /* TWD */ - at91_set_multi_drive(AT91_PIN_PC7, 1); - - at91_set_A_periph(AT91_PIN_PC8, 0); /* TWCK */ - at91_set_multi_drive(AT91_PIN_PC8, 1); - - /* pins used for TWI1 interface */ - at91_set_A_periph(AT91_PIN_PC20, 0); /* TWD */ - at91_set_multi_drive(AT91_PIN_PC20, 1); - - at91_set_A_periph(AT91_PIN_PC21, 0); /* TWCK */ - at91_set_multi_drive(AT91_PIN_PC21, 1); - - i2c_register_board_info(0, devices, nr_devices); - platform_device_register(&at572d940hf_twi0_device); - platform_device_register(&at572d940hf_twi1_device); -} -#else -void __init at91_add_device_i2c(struct i2c_board_info *devices, int nr_devices) {} -#endif - - -/* -------------------------------------------------------------------- - * SPI - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_SPI_ATMEL) || defined(CONFIG_SPI_ATMEL_MODULE) -static u64 spi_dmamask = DMA_BIT_MASK(32); - -static struct resource spi0_resources[] = { - [0] = { - .start = AT572D940HF_BASE_SPI0, - .end = AT572D940HF_BASE_SPI0 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_SPI0, - .end = AT572D940HF_ID_SPI0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_spi0_device = { - .name = "atmel_spi", - .id = 0, - .dev = { - .dma_mask = &spi_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, - .resource = spi0_resources, - .num_resources = ARRAY_SIZE(spi0_resources), -}; - -static const unsigned spi0_standard_cs[4] = { AT91_PIN_PA3, AT91_PIN_PA4, AT91_PIN_PA5, AT91_PIN_PA6 }; - -static struct resource spi1_resources[] = { - [0] = { - .start = AT572D940HF_BASE_SPI1, - .end = AT572D940HF_BASE_SPI1 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_SPI1, - .end = AT572D940HF_ID_SPI1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_spi1_device = { - .name = "atmel_spi", - .id = 1, - .dev = { - .dma_mask = &spi_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, - .resource = spi1_resources, - .num_resources = ARRAY_SIZE(spi1_resources), -}; - -static const unsigned spi1_standard_cs[4] = { AT91_PIN_PC3, AT91_PIN_PC4, AT91_PIN_PC5, AT91_PIN_PC6 }; - -void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) -{ - int i; - unsigned long cs_pin; - short enable_spi0 = 0; - short enable_spi1 = 0; - - /* Choose SPI chip-selects */ - for (i = 0; i < nr_devices; i++) { - if (devices[i].controller_data) - cs_pin = (unsigned long) devices[i].controller_data; - else if (devices[i].bus_num == 0) - cs_pin = spi0_standard_cs[devices[i].chip_select]; - else - cs_pin = spi1_standard_cs[devices[i].chip_select]; - - if (devices[i].bus_num == 0) - enable_spi0 = 1; - else - enable_spi1 = 1; - - /* enable chip-select pin */ - at91_set_gpio_output(cs_pin, 1); - - /* pass chip-select pin to driver */ - devices[i].controller_data = (void *) cs_pin; - } - - spi_register_board_info(devices, nr_devices); - - /* Configure SPI bus(es) */ - if (enable_spi0) { - at91_set_A_periph(AT91_PIN_PA0, 0); /* SPI0_MISO */ - at91_set_A_periph(AT91_PIN_PA1, 0); /* SPI0_MOSI */ - at91_set_A_periph(AT91_PIN_PA2, 0); /* SPI0_SPCK */ - - at91_clock_associate("spi0_clk", &at572d940hf_spi0_device.dev, "spi_clk"); - platform_device_register(&at572d940hf_spi0_device); - } - if (enable_spi1) { - at91_set_A_periph(AT91_PIN_PC0, 0); /* SPI1_MISO */ - at91_set_A_periph(AT91_PIN_PC1, 0); /* SPI1_MOSI */ - at91_set_A_periph(AT91_PIN_PC2, 0); /* SPI1_SPCK */ - - at91_clock_associate("spi1_clk", &at572d940hf_spi1_device.dev, "spi_clk"); - platform_device_register(&at572d940hf_spi1_device); - } -} -#else -void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) {} -#endif - - -/* -------------------------------------------------------------------- - * Timer/Counter blocks - * -------------------------------------------------------------------- */ - -#ifdef CONFIG_ATMEL_TCLIB - -static struct resource tcb_resources[] = { - [0] = { - .start = AT572D940HF_BASE_TCB, - .end = AT572D940HF_BASE_TCB + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_TC0, - .end = AT572D940HF_ID_TC0, - .flags = IORESOURCE_IRQ, - }, - [2] = { - .start = AT572D940HF_ID_TC1, - .end = AT572D940HF_ID_TC1, - .flags = IORESOURCE_IRQ, - }, - [3] = { - .start = AT572D940HF_ID_TC2, - .end = AT572D940HF_ID_TC2, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device at572d940hf_tcb_device = { - .name = "atmel_tcb", - .id = 0, - .resource = tcb_resources, - .num_resources = ARRAY_SIZE(tcb_resources), -}; - -static void __init at91_add_device_tc(void) -{ - /* this chip has a separate clock and irq for each TC channel */ - at91_clock_associate("tc0_clk", &at572d940hf_tcb_device.dev, "t0_clk"); - at91_clock_associate("tc1_clk", &at572d940hf_tcb_device.dev, "t1_clk"); - at91_clock_associate("tc2_clk", &at572d940hf_tcb_device.dev, "t2_clk"); - platform_device_register(&at572d940hf_tcb_device); -} -#else -static void __init at91_add_device_tc(void) { } -#endif - - -/* -------------------------------------------------------------------- - * RTT - * -------------------------------------------------------------------- */ - -static struct resource rtt_resources[] = { - { - .start = AT91_BASE_SYS + AT91_RTT, - .end = AT91_BASE_SYS + AT91_RTT + SZ_16 - 1, - .flags = IORESOURCE_MEM, - } -}; - -static struct platform_device at572d940hf_rtt_device = { - .name = "at91_rtt", - .id = 0, - .resource = rtt_resources, - .num_resources = ARRAY_SIZE(rtt_resources), -}; - -static void __init at91_add_device_rtt(void) -{ - platform_device_register(&at572d940hf_rtt_device); -} - - -/* -------------------------------------------------------------------- - * Watchdog - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_AT91SAM9X_WATCHDOG) || defined(CONFIG_AT91SAM9X_WATCHDOG_MODULE) -static struct platform_device at572d940hf_wdt_device = { - .name = "at91_wdt", - .id = -1, - .num_resources = 0, -}; - -static void __init at91_add_device_watchdog(void) -{ - platform_device_register(&at572d940hf_wdt_device); -} -#else -static void __init at91_add_device_watchdog(void) {} -#endif - - -/* -------------------------------------------------------------------- - * UART - * -------------------------------------------------------------------- */ - -#if defined(CONFIG_SERIAL_ATMEL) -static struct resource dbgu_resources[] = { - [0] = { - .start = AT91_VA_BASE_SYS + AT91_DBGU, - .end = AT91_VA_BASE_SYS + AT91_DBGU + SZ_512 - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT91_ID_SYS, - .end = AT91_ID_SYS, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct atmel_uart_data dbgu_data = { - .use_dma_tx = 0, - .use_dma_rx = 0, /* DBGU not capable of receive DMA */ - .regs = (void __iomem *)(AT91_VA_BASE_SYS + AT91_DBGU), -}; - -static u64 dbgu_dmamask = DMA_BIT_MASK(32); - -static struct platform_device at572d940hf_dbgu_device = { - .name = "atmel_usart", - .id = 0, - .dev = { - .dma_mask = &dbgu_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &dbgu_data, - }, - .resource = dbgu_resources, - .num_resources = ARRAY_SIZE(dbgu_resources), -}; - -static inline void configure_dbgu_pins(void) -{ - at91_set_A_periph(AT91_PIN_PC31, 1); /* DTXD */ - at91_set_A_periph(AT91_PIN_PC30, 0); /* DRXD */ -} - -static struct resource uart0_resources[] = { - [0] = { - .start = AT572D940HF_BASE_US0, - .end = AT572D940HF_BASE_US0 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_US0, - .end = AT572D940HF_ID_US0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct atmel_uart_data uart0_data = { - .use_dma_tx = 1, - .use_dma_rx = 1, -}; - -static u64 uart0_dmamask = DMA_BIT_MASK(32); - -static struct platform_device at572d940hf_uart0_device = { - .name = "atmel_usart", - .id = 1, - .dev = { - .dma_mask = &uart0_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &uart0_data, - }, - .resource = uart0_resources, - .num_resources = ARRAY_SIZE(uart0_resources), -}; - -static inline void configure_usart0_pins(unsigned pins) -{ - at91_set_A_periph(AT91_PIN_PA8, 1); /* TXD0 */ - at91_set_A_periph(AT91_PIN_PA7, 0); /* RXD0 */ - - if (pins & ATMEL_UART_RTS) - at91_set_A_periph(AT91_PIN_PA10, 0); /* RTS0 */ - if (pins & ATMEL_UART_CTS) - at91_set_A_periph(AT91_PIN_PA9, 0); /* CTS0 */ -} - -static struct resource uart1_resources[] = { - [0] = { - .start = AT572D940HF_BASE_US1, - .end = AT572D940HF_BASE_US1 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_US1, - .end = AT572D940HF_ID_US1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct atmel_uart_data uart1_data = { - .use_dma_tx = 1, - .use_dma_rx = 1, -}; - -static u64 uart1_dmamask = DMA_BIT_MASK(32); - -static struct platform_device at572d940hf_uart1_device = { - .name = "atmel_usart", - .id = 2, - .dev = { - .dma_mask = &uart1_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &uart1_data, - }, - .resource = uart1_resources, - .num_resources = ARRAY_SIZE(uart1_resources), -}; - -static inline void configure_usart1_pins(unsigned pins) -{ - at91_set_A_periph(AT91_PIN_PC10, 1); /* TXD1 */ - at91_set_A_periph(AT91_PIN_PC9 , 0); /* RXD1 */ - - if (pins & ATMEL_UART_RTS) - at91_set_A_periph(AT91_PIN_PC12, 0); /* RTS1 */ - if (pins & ATMEL_UART_CTS) - at91_set_A_periph(AT91_PIN_PC11, 0); /* CTS1 */ -} - -static struct resource uart2_resources[] = { - [0] = { - .start = AT572D940HF_BASE_US2, - .end = AT572D940HF_BASE_US2 + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = AT572D940HF_ID_US2, - .end = AT572D940HF_ID_US2, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct atmel_uart_data uart2_data = { - .use_dma_tx = 1, - .use_dma_rx = 1, -}; - -static u64 uart2_dmamask = DMA_BIT_MASK(32); - -static struct platform_device at572d940hf_uart2_device = { - .name = "atmel_usart", - .id = 3, - .dev = { - .dma_mask = &uart2_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &uart2_data, - }, - .resource = uart2_resources, - .num_resources = ARRAY_SIZE(uart2_resources), -}; - -static inline void configure_usart2_pins(unsigned pins) -{ - at91_set_A_periph(AT91_PIN_PC15, 1); /* TXD2 */ - at91_set_A_periph(AT91_PIN_PC14, 0); /* RXD2 */ - - if (pins & ATMEL_UART_RTS) - at91_set_A_periph(AT91_PIN_PC17, 0); /* RTS2 */ - if (pins & ATMEL_UART_CTS) - at91_set_A_periph(AT91_PIN_PC16, 0); /* CTS2 */ -} - -static struct platform_device *__initdata at91_uarts[ATMEL_MAX_UART]; /* the UARTs to use */ -struct platform_device *atmel_default_console_device; /* the serial console device */ - -void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) -{ - struct platform_device *pdev; - - switch (id) { - case 0: /* DBGU */ - pdev = &at572d940hf_dbgu_device; - configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); - break; - case AT572D940HF_ID_US0: - pdev = &at572d940hf_uart0_device; - configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); - break; - case AT572D940HF_ID_US1: - pdev = &at572d940hf_uart1_device; - configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); - break; - case AT572D940HF_ID_US2: - pdev = &at572d940hf_uart2_device; - configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); - break; - default: - return; - } - pdev->id = portnr; /* update to mapped ID */ - - if (portnr < ATMEL_MAX_UART) - at91_uarts[portnr] = pdev; -} - -void __init at91_set_serial_console(unsigned portnr) -{ - if (portnr < ATMEL_MAX_UART) - atmel_default_console_device = at91_uarts[portnr]; -} - -void __init at91_add_device_serial(void) -{ - int i; - - for (i = 0; i < ATMEL_MAX_UART; i++) { - if (at91_uarts[i]) - platform_device_register(at91_uarts[i]); - } - - if (!atmel_default_console_device) - printk(KERN_INFO "AT91: No default serial console defined.\n"); -} - -#else -void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) {} -void __init at91_set_serial_console(unsigned portnr) {} -void __init at91_add_device_serial(void) {} -#endif - - -/* -------------------------------------------------------------------- - * mAgic - * -------------------------------------------------------------------- */ - -#ifdef CONFIG_MAGICV -static struct resource mAgic_resources[] = { - { - .start = AT91_MAGIC_PM_BASE, - .end = AT91_MAGIC_PM_BASE + AT91_MAGIC_PM_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - { - .start = AT91_MAGIC_DM_I_BASE, - .end = AT91_MAGIC_DM_I_BASE + AT91_MAGIC_DM_I_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - { - .start = AT91_MAGIC_DM_F_BASE, - .end = AT91_MAGIC_DM_F_BASE + AT91_MAGIC_DM_F_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - { - .start = AT91_MAGIC_DM_DB_BASE, - .end = AT91_MAGIC_DM_DB_BASE + AT91_MAGIC_DM_DB_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - { - .start = AT91_MAGIC_REGS_BASE, - .end = AT91_MAGIC_REGS_BASE + AT91_MAGIC_REGS_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - { - .start = AT91_MAGIC_EXTPAGE_BASE, - .end = AT91_MAGIC_EXTPAGE_BASE + AT91_MAGIC_EXTPAGE_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - { - .start = AT572D940HF_ID_MSIRQ0, - .end = AT572D940HF_ID_MSIRQ0, - .flags = IORESOURCE_IRQ, - }, - { - .start = AT572D940HF_ID_MHALT, - .end = AT572D940HF_ID_MHALT, - .flags = IORESOURCE_IRQ, - }, - { - .start = AT572D940HF_ID_MEXC, - .end = AT572D940HF_ID_MEXC, - .flags = IORESOURCE_IRQ, - }, - { - .start = AT572D940HF_ID_MEDMA, - .end = AT572D940HF_ID_MEDMA, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device mAgic_device = { - .name = "mAgic", - .id = -1, - .num_resources = ARRAY_SIZE(mAgic_resources), - .resource = mAgic_resources, -}; - -void __init at91_add_device_mAgic(void) -{ - platform_device_register(&mAgic_device); -} -#else -void __init at91_add_device_mAgic(void) {} -#endif - - -/* -------------------------------------------------------------------- */ - -/* - * These devices are always present and don't need any board-specific - * setup. - */ -static int __init at91_add_standard_devices(void) -{ - at91_add_device_rtt(); - at91_add_device_watchdog(); - at91_add_device_tc(); - return 0; -} - -arch_initcall(at91_add_standard_devices); diff --git a/arch/arm/mach-at91/at91cap9.c b/arch/arm/mach-at91/at91cap9.c index 73376170fb91..17fae4a42ab5 100644 --- a/arch/arm/mach-at91/at91cap9.c +++ b/arch/arm/mach-at91/at91cap9.c @@ -222,6 +222,25 @@ static struct clk *periph_clocks[] __initdata = { // irq0 .. irq1 }; +static struct clk_lookup periph_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("hclk", "atmel_usba_udc.0", &utmi_clk), + CLKDEV_CON_DEV_ID("pclk", "atmel_usba_udc.0", &udphs_clk), + CLKDEV_CON_DEV_ID("mci_clk", "at91_mci.0", &mmc0_clk), + CLKDEV_CON_DEV_ID("mci_clk", "at91_mci.1", &mmc1_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.0", &spi0_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tcb_clk), + CLKDEV_CON_DEV_ID("ssc", "ssc.0", &ssc0_clk), + CLKDEV_CON_DEV_ID("ssc", "ssc.1", &ssc1_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), +}; + /* * The four programmable clocks. * You must configure pin multiplexing to bring these signals out. @@ -258,12 +277,29 @@ static void __init at91cap9_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + clk_register(&pck0); clk_register(&pck1); clk_register(&pck2); clk_register(&pck3); } +static struct clk_lookup console_clock_lookup; + +void __init at91cap9_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -303,11 +339,14 @@ static void at91cap9_poweroff(void) * AT91CAP9 processor initialization * -------------------------------------------------------------------- */ -void __init at91cap9_initialize(unsigned long main_clock) +void __init at91cap9_map_io(void) { /* Map peripherals */ iotable_init(at91cap9_io_desc, ARRAY_SIZE(at91cap9_io_desc)); +} +void __init at91cap9_initialize(unsigned long main_clock) +{ at91_arch_reset = at91cap9_reset; pm_power_off = at91cap9_poweroff; at91_extern_irq = (1 << AT91CAP9_ID_IRQ0) | (1 << AT91CAP9_ID_IRQ1); diff --git a/arch/arm/mach-at91/at91cap9_devices.c b/arch/arm/mach-at91/at91cap9_devices.c index 9ffbf3a2dfea..cd850ed6f335 100644 --- a/arch/arm/mach-at91/at91cap9_devices.c +++ b/arch/arm/mach-at91/at91cap9_devices.c @@ -171,7 +171,7 @@ void __init at91_add_device_usba(struct usba_platform_data *data) */ usba_udc_data.pdata.vbus_pin = -EINVAL; usba_udc_data.pdata.num_ep = ARRAY_SIZE(usba_udc_ep); - memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep));; + memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep)); if (data && data->vbus_pin > 0) { at91_set_gpio_input(data->vbus_pin, 0); @@ -181,10 +181,6 @@ void __init at91_add_device_usba(struct usba_platform_data *data) /* Pullup pin is handled internally by USB device peripheral */ - /* Clocks */ - at91_clock_associate("utmi_clk", &at91_usba_udc_device.dev, "hclk"); - at91_clock_associate("udphs_clk", &at91_usba_udc_device.dev, "pclk"); - platform_device_register(&at91_usba_udc_device); } #else @@ -355,7 +351,6 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) } mmc0_data = *data; - at91_clock_associate("mci0_clk", &at91cap9_mmc0_device.dev, "mci_clk"); platform_device_register(&at91cap9_mmc0_device); } else { /* MCI1 */ /* CLK */ @@ -373,7 +368,6 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) } mmc1_data = *data; - at91_clock_associate("mci1_clk", &at91cap9_mmc1_device.dev, "mci_clk"); platform_device_register(&at91cap9_mmc1_device); } } @@ -614,7 +608,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_B_periph(AT91_PIN_PA1, 0); /* SPI0_MOSI */ at91_set_B_periph(AT91_PIN_PA2, 0); /* SPI0_SPCK */ - at91_clock_associate("spi0_clk", &at91cap9_spi0_device.dev, "spi_clk"); platform_device_register(&at91cap9_spi0_device); } if (enable_spi1) { @@ -622,7 +615,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PB13, 0); /* SPI1_MOSI */ at91_set_A_periph(AT91_PIN_PB14, 0); /* SPI1_SPCK */ - at91_clock_associate("spi1_clk", &at91cap9_spi1_device.dev, "spi_clk"); platform_device_register(&at91cap9_spi1_device); } } @@ -659,8 +651,6 @@ static struct platform_device at91cap9_tcb_device = { static void __init at91_add_device_tc(void) { - /* this chip has one clock and irq for all three TC channels */ - at91_clock_associate("tcb_clk", &at91cap9_tcb_device.dev, "t0_clk"); platform_device_register(&at91cap9_tcb_device); } #else @@ -1001,12 +991,10 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91CAP9_ID_SSC0: pdev = &at91cap9_ssc0_device; configure_ssc0_pins(pins); - at91_clock_associate("ssc0_clk", &pdev->dev, "ssc"); break; case AT91CAP9_ID_SSC1: pdev = &at91cap9_ssc1_device; configure_ssc1_pins(pins); - at91_clock_associate("ssc1_clk", &pdev->dev, "ssc"); break; default: return; @@ -1199,32 +1187,30 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91cap9_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91CAP9_ID_US0: pdev = &at91cap9_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91CAP9_ID_US1: pdev = &at91cap9_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91CAP9_ID_US2: pdev = &at91cap9_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1232,8 +1218,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91cap9_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c index 2e9ecad97f3d..b228ce9e21a1 100644 --- a/arch/arm/mach-at91/at91rm9200.c +++ b/arch/arm/mach-at91/at91rm9200.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "generic.h" #include "clock.h" @@ -191,6 +192,26 @@ static struct clk *periph_clocks[] __initdata = { // irq0 .. irq6 }; +static struct clk_lookup periph_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk), + CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.1", &tc3_clk), + CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.1", &tc4_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.1", &tc5_clk), + CLKDEV_CON_DEV_ID("ssc", "ssc.0", &ssc0_clk), + CLKDEV_CON_DEV_ID("ssc", "ssc.1", &ssc1_clk), + CLKDEV_CON_DEV_ID("ssc", "ssc.2", &ssc2_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.4", &usart3_clk), +}; + /* * The four programmable clocks. * You must configure pin multiplexing to bring these signals out. @@ -227,12 +248,29 @@ static void __init at91rm9200_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + clk_register(&pck0); clk_register(&pck1); clk_register(&pck2); clk_register(&pck3); } +static struct clk_lookup console_clock_lookup; + +void __init at91rm9200_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -266,15 +304,25 @@ static void at91rm9200_reset(void) at91_sys_write(AT91_ST_CR, AT91_ST_WDRST); } +int rm9200_type; +EXPORT_SYMBOL(rm9200_type); + +void __init at91rm9200_set_type(int type) +{ + rm9200_type = type; +} /* -------------------------------------------------------------------- * AT91RM9200 processor initialization * -------------------------------------------------------------------- */ -void __init at91rm9200_initialize(unsigned long main_clock, unsigned short banks) +void __init at91rm9200_map_io(void) { /* Map peripherals */ iotable_init(at91rm9200_io_desc, ARRAY_SIZE(at91rm9200_io_desc)); +} +void __init at91rm9200_initialize(unsigned long main_clock) +{ at91_arch_reset = at91rm9200_reset; at91_extern_irq = (1 << AT91RM9200_ID_IRQ0) | (1 << AT91RM9200_ID_IRQ1) | (1 << AT91RM9200_ID_IRQ2) | (1 << AT91RM9200_ID_IRQ3) @@ -288,7 +336,8 @@ void __init at91rm9200_initialize(unsigned long main_clock, unsigned short banks at91rm9200_register_clocks(); /* Initialize GPIO subsystem */ - at91_gpio_init(at91rm9200_gpio, banks); + at91_gpio_init(at91rm9200_gpio, + cpu_is_at91rm9200_bga() ? AT91RM9200_BGA : AT91RM9200_PQFP); } diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c index 7b539228e0ef..a0ba475be04c 100644 --- a/arch/arm/mach-at91/at91rm9200_devices.c +++ b/arch/arm/mach-at91/at91rm9200_devices.c @@ -644,15 +644,7 @@ static struct platform_device at91rm9200_tcb1_device = { static void __init at91_add_device_tc(void) { - /* this chip has a separate clock and irq for each TC channel */ - at91_clock_associate("tc0_clk", &at91rm9200_tcb0_device.dev, "t0_clk"); - at91_clock_associate("tc1_clk", &at91rm9200_tcb0_device.dev, "t1_clk"); - at91_clock_associate("tc2_clk", &at91rm9200_tcb0_device.dev, "t2_clk"); platform_device_register(&at91rm9200_tcb0_device); - - at91_clock_associate("tc3_clk", &at91rm9200_tcb1_device.dev, "t0_clk"); - at91_clock_associate("tc4_clk", &at91rm9200_tcb1_device.dev, "t1_clk"); - at91_clock_associate("tc5_clk", &at91rm9200_tcb1_device.dev, "t2_clk"); platform_device_register(&at91rm9200_tcb1_device); } #else @@ -849,17 +841,14 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91RM9200_ID_SSC0: pdev = &at91rm9200_ssc0_device; configure_ssc0_pins(pins); - at91_clock_associate("ssc0_clk", &pdev->dev, "ssc"); break; case AT91RM9200_ID_SSC1: pdev = &at91rm9200_ssc1_device; configure_ssc1_pins(pins); - at91_clock_associate("ssc1_clk", &pdev->dev, "ssc"); break; case AT91RM9200_ID_SSC2: pdev = &at91rm9200_ssc2_device; configure_ssc2_pins(pins); - at91_clock_associate("ssc2_clk", &pdev->dev, "ssc"); break; default: return; @@ -1109,37 +1098,34 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91rm9200_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91RM9200_ID_US0: pdev = &at91rm9200_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91RM9200_ID_US1: pdev = &at91rm9200_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91RM9200_ID_US2: pdev = &at91rm9200_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; case AT91RM9200_ID_US3: pdev = &at91rm9200_uart3_device; configure_usart3_pins(pins); - at91_clock_associate("usart3_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1147,8 +1133,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91rm9200_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91sam9260.c b/arch/arm/mach-at91/at91sam9260.c index 195208b30024..7d606b04d313 100644 --- a/arch/arm/mach-at91/at91sam9260.c +++ b/arch/arm/mach-at91/at91sam9260.c @@ -231,6 +231,28 @@ static struct clk *periph_clocks[] __initdata = { // irq0 .. irq2 }; +static struct clk_lookup periph_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.0", &spi0_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk), + CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk), + CLKDEV_CON_DEV_ID("t3_clk", "atmel_tcb.1", &tc3_clk), + CLKDEV_CON_DEV_ID("t4_clk", "atmel_tcb.1", &tc4_clk), + CLKDEV_CON_DEV_ID("t5_clk", "atmel_tcb.1", &tc5_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.4", &usart3_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.5", &usart4_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.6", &usart5_clk), +}; + /* * The two programmable clocks. * You must configure pin multiplexing to bring these signals out. @@ -255,10 +277,27 @@ static void __init at91sam9260_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + clk_register(&pck0); clk_register(&pck1); } +static struct clk_lookup console_clock_lookup; + +void __init at91sam9260_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -289,7 +328,7 @@ static void at91sam9260_poweroff(void) * AT91SAM9260 processor initialization * -------------------------------------------------------------------- */ -static void __init at91sam9xe_initialize(void) +static void __init at91sam9xe_map_io(void) { unsigned long cidr, sram_size; @@ -310,18 +349,21 @@ static void __init at91sam9xe_initialize(void) iotable_init(at91sam9xe_sram_desc, ARRAY_SIZE(at91sam9xe_sram_desc)); } -void __init at91sam9260_initialize(unsigned long main_clock) +void __init at91sam9260_map_io(void) { /* Map peripherals */ iotable_init(at91sam9260_io_desc, ARRAY_SIZE(at91sam9260_io_desc)); if (cpu_is_at91sam9xe()) - at91sam9xe_initialize(); + at91sam9xe_map_io(); else if (cpu_is_at91sam9g20()) iotable_init(at91sam9g20_sram_desc, ARRAY_SIZE(at91sam9g20_sram_desc)); else iotable_init(at91sam9260_sram_desc, ARRAY_SIZE(at91sam9260_sram_desc)); +} +void __init at91sam9260_initialize(unsigned long main_clock) +{ at91_arch_reset = at91sam9_alt_reset; pm_power_off = at91sam9260_poweroff; at91_extern_irq = (1 << AT91SAM9260_ID_IRQ0) | (1 << AT91SAM9260_ID_IRQ1) diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c index 07eb7b07e442..1fdeb9058a76 100644 --- a/arch/arm/mach-at91/at91sam9260_devices.c +++ b/arch/arm/mach-at91/at91sam9260_devices.c @@ -609,7 +609,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PA1, 0); /* SPI0_MOSI */ at91_set_A_periph(AT91_PIN_PA2, 0); /* SPI1_SPCK */ - at91_clock_associate("spi0_clk", &at91sam9260_spi0_device.dev, "spi_clk"); platform_device_register(&at91sam9260_spi0_device); } if (enable_spi1) { @@ -617,7 +616,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PB1, 0); /* SPI1_MOSI */ at91_set_A_periph(AT91_PIN_PB2, 0); /* SPI1_SPCK */ - at91_clock_associate("spi1_clk", &at91sam9260_spi1_device.dev, "spi_clk"); platform_device_register(&at91sam9260_spi1_device); } } @@ -694,15 +692,7 @@ static struct platform_device at91sam9260_tcb1_device = { static void __init at91_add_device_tc(void) { - /* this chip has a separate clock and irq for each TC channel */ - at91_clock_associate("tc0_clk", &at91sam9260_tcb0_device.dev, "t0_clk"); - at91_clock_associate("tc1_clk", &at91sam9260_tcb0_device.dev, "t1_clk"); - at91_clock_associate("tc2_clk", &at91sam9260_tcb0_device.dev, "t2_clk"); platform_device_register(&at91sam9260_tcb0_device); - - at91_clock_associate("tc3_clk", &at91sam9260_tcb1_device.dev, "t0_clk"); - at91_clock_associate("tc4_clk", &at91sam9260_tcb1_device.dev, "t1_clk"); - at91_clock_associate("tc5_clk", &at91sam9260_tcb1_device.dev, "t2_clk"); platform_device_register(&at91sam9260_tcb1_device); } #else @@ -820,7 +810,6 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91SAM9260_ID_SSC: pdev = &at91sam9260_ssc_device; configure_ssc_pins(pins); - at91_clock_associate("ssc_clk", &pdev->dev, "pclk"); break; default: return; @@ -1139,47 +1128,42 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91sam9260_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91SAM9260_ID_US0: pdev = &at91sam9260_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91SAM9260_ID_US1: pdev = &at91sam9260_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91SAM9260_ID_US2: pdev = &at91sam9260_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; case AT91SAM9260_ID_US3: pdev = &at91sam9260_uart3_device; configure_usart3_pins(pins); - at91_clock_associate("usart3_clk", &pdev->dev, "usart"); break; case AT91SAM9260_ID_US4: pdev = &at91sam9260_uart4_device; configure_usart4_pins(); - at91_clock_associate("usart4_clk", &pdev->dev, "usart"); break; case AT91SAM9260_ID_US5: pdev = &at91sam9260_uart5_device; configure_usart5_pins(); - at91_clock_associate("usart5_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1187,8 +1171,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91sam9260_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c index fcad88668504..c1483168c97a 100644 --- a/arch/arm/mach-at91/at91sam9261.c +++ b/arch/arm/mach-at91/at91sam9261.c @@ -178,6 +178,24 @@ static struct clk *periph_clocks[] __initdata = { // irq0 .. irq2 }; +static struct clk_lookup periph_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.0", &spi0_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk), + CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), +}; + /* * The four programmable clocks. * You must configure pin multiplexing to bring these signals out. @@ -228,6 +246,11 @@ static void __init at91sam9261_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + clk_register(&pck0); clk_register(&pck1); clk_register(&pck2); @@ -237,6 +260,18 @@ static void __init at91sam9261_register_clocks(void) clk_register(&hck1); } +static struct clk_lookup console_clock_lookup; + +void __init at91sam9261_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -267,7 +302,7 @@ static void at91sam9261_poweroff(void) * AT91SAM9261 processor initialization * -------------------------------------------------------------------- */ -void __init at91sam9261_initialize(unsigned long main_clock) +void __init at91sam9261_map_io(void) { /* Map peripherals */ iotable_init(at91sam9261_io_desc, ARRAY_SIZE(at91sam9261_io_desc)); @@ -276,8 +311,10 @@ void __init at91sam9261_initialize(unsigned long main_clock) iotable_init(at91sam9g10_sram_desc, ARRAY_SIZE(at91sam9g10_sram_desc)); else iotable_init(at91sam9261_sram_desc, ARRAY_SIZE(at91sam9261_sram_desc)); +} - +void __init at91sam9261_initialize(unsigned long main_clock) +{ at91_arch_reset = at91sam9_alt_reset; pm_power_off = at91sam9261_poweroff; at91_extern_irq = (1 << AT91SAM9261_ID_IRQ0) | (1 << AT91SAM9261_ID_IRQ1) diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c index 59fc48311fb0..3eb4538fceeb 100644 --- a/arch/arm/mach-at91/at91sam9261_devices.c +++ b/arch/arm/mach-at91/at91sam9261_devices.c @@ -426,7 +426,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PA1, 0); /* SPI0_MOSI */ at91_set_A_periph(AT91_PIN_PA2, 0); /* SPI0_SPCK */ - at91_clock_associate("spi0_clk", &at91sam9261_spi0_device.dev, "spi_clk"); platform_device_register(&at91sam9261_spi0_device); } if (enable_spi1) { @@ -434,7 +433,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PB31, 0); /* SPI1_MOSI */ at91_set_A_periph(AT91_PIN_PB29, 0); /* SPI1_SPCK */ - at91_clock_associate("spi1_clk", &at91sam9261_spi1_device.dev, "spi_clk"); platform_device_register(&at91sam9261_spi1_device); } } @@ -581,10 +579,6 @@ static struct platform_device at91sam9261_tcb_device = { static void __init at91_add_device_tc(void) { - /* this chip has a separate clock and irq for each TC channel */ - at91_clock_associate("tc0_clk", &at91sam9261_tcb_device.dev, "t0_clk"); - at91_clock_associate("tc1_clk", &at91sam9261_tcb_device.dev, "t1_clk"); - at91_clock_associate("tc2_clk", &at91sam9261_tcb_device.dev, "t2_clk"); platform_device_register(&at91sam9261_tcb_device); } #else @@ -786,17 +780,14 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91SAM9261_ID_SSC0: pdev = &at91sam9261_ssc0_device; configure_ssc0_pins(pins); - at91_clock_associate("ssc0_clk", &pdev->dev, "pclk"); break; case AT91SAM9261_ID_SSC1: pdev = &at91sam9261_ssc1_device; configure_ssc1_pins(pins); - at91_clock_associate("ssc1_clk", &pdev->dev, "pclk"); break; case AT91SAM9261_ID_SSC2: pdev = &at91sam9261_ssc2_device; configure_ssc2_pins(pins); - at91_clock_associate("ssc2_clk", &pdev->dev, "pclk"); break; default: return; @@ -989,32 +980,30 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91sam9261_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91SAM9261_ID_US0: pdev = &at91sam9261_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91SAM9261_ID_US1: pdev = &at91sam9261_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91SAM9261_ID_US2: pdev = &at91sam9261_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1022,8 +1011,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91sam9261_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91sam9263.c b/arch/arm/mach-at91/at91sam9263.c index 249f900954d8..dc28477d14ff 100644 --- a/arch/arm/mach-at91/at91sam9263.c +++ b/arch/arm/mach-at91/at91sam9263.c @@ -199,6 +199,23 @@ static struct clk *periph_clocks[] __initdata = { // irq0 .. irq1 }; +static struct clk_lookup periph_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), + CLKDEV_CON_DEV_ID("mci_clk", "at91_mci.0", &mmc0_clk), + CLKDEV_CON_DEV_ID("mci_clk", "at91_mci.1", &mmc1_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.0", &spi0_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tcb_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), +}; + /* * The four programmable clocks. * You must configure pin multiplexing to bring these signals out. @@ -235,12 +252,29 @@ static void __init at91sam9263_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + clk_register(&pck0); clk_register(&pck1); clk_register(&pck2); clk_register(&pck3); } +static struct clk_lookup console_clock_lookup; + +void __init at91sam9263_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -279,11 +313,14 @@ static void at91sam9263_poweroff(void) * AT91SAM9263 processor initialization * -------------------------------------------------------------------- */ -void __init at91sam9263_initialize(unsigned long main_clock) +void __init at91sam9263_map_io(void) { /* Map peripherals */ iotable_init(at91sam9263_io_desc, ARRAY_SIZE(at91sam9263_io_desc)); +} +void __init at91sam9263_initialize(unsigned long main_clock) +{ at91_arch_reset = at91sam9_alt_reset; pm_power_off = at91sam9263_poweroff; at91_extern_irq = (1 << AT91SAM9263_ID_IRQ0) | (1 << AT91SAM9263_ID_IRQ1); diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c index fb5c23af1017..ffe081b77ed0 100644 --- a/arch/arm/mach-at91/at91sam9263_devices.c +++ b/arch/arm/mach-at91/at91sam9263_devices.c @@ -308,7 +308,6 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) } mmc0_data = *data; - at91_clock_associate("mci0_clk", &at91sam9263_mmc0_device.dev, "mci_clk"); platform_device_register(&at91sam9263_mmc0_device); } else { /* MCI1 */ /* CLK */ @@ -339,7 +338,6 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) } mmc1_data = *data; - at91_clock_associate("mci1_clk", &at91sam9263_mmc1_device.dev, "mci_clk"); platform_device_register(&at91sam9263_mmc1_device); } } @@ -686,7 +684,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_B_periph(AT91_PIN_PA1, 0); /* SPI0_MOSI */ at91_set_B_periph(AT91_PIN_PA2, 0); /* SPI0_SPCK */ - at91_clock_associate("spi0_clk", &at91sam9263_spi0_device.dev, "spi_clk"); platform_device_register(&at91sam9263_spi0_device); } if (enable_spi1) { @@ -694,7 +691,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PB13, 0); /* SPI1_MOSI */ at91_set_A_periph(AT91_PIN_PB14, 0); /* SPI1_SPCK */ - at91_clock_associate("spi1_clk", &at91sam9263_spi1_device.dev, "spi_clk"); platform_device_register(&at91sam9263_spi1_device); } } @@ -941,8 +937,6 @@ static struct platform_device at91sam9263_tcb_device = { static void __init at91_add_device_tc(void) { - /* this chip has one clock and irq for all three TC channels */ - at91_clock_associate("tcb_clk", &at91sam9263_tcb_device.dev, "t0_clk"); platform_device_register(&at91sam9263_tcb_device); } #else @@ -1171,12 +1165,10 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91SAM9263_ID_SSC0: pdev = &at91sam9263_ssc0_device; configure_ssc0_pins(pins); - at91_clock_associate("ssc0_clk", &pdev->dev, "pclk"); break; case AT91SAM9263_ID_SSC1: pdev = &at91sam9263_ssc1_device; configure_ssc1_pins(pins); - at91_clock_associate("ssc1_clk", &pdev->dev, "pclk"); break; default: return; @@ -1370,32 +1362,30 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91sam9263_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91SAM9263_ID_US0: pdev = &at91sam9263_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91SAM9263_ID_US1: pdev = &at91sam9263_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91SAM9263_ID_US2: pdev = &at91sam9263_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1403,8 +1393,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91sam9263_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91sam9g45.c b/arch/arm/mach-at91/at91sam9g45.c index c67b47f1c0fd..2bb6ff9af1c7 100644 --- a/arch/arm/mach-at91/at91sam9g45.c +++ b/arch/arm/mach-at91/at91sam9g45.c @@ -184,22 +184,6 @@ static struct clk vdec_clk = { .type = CLK_TYPE_PERIPHERAL, }; -/* One additional fake clock for ohci */ -static struct clk ohci_clk = { - .name = "ohci_clk", - .pmc_mask = 0, - .type = CLK_TYPE_PERIPHERAL, - .parent = &uhphs_clk, -}; - -/* One additional fake clock for second TC block */ -static struct clk tcb1_clk = { - .name = "tcb1_clk", - .pmc_mask = 0, - .type = CLK_TYPE_PERIPHERAL, - .parent = &tcb0_clk, -}; - static struct clk *periph_clocks[] __initdata = { &pioA_clk, &pioB_clk, @@ -228,8 +212,30 @@ static struct clk *periph_clocks[] __initdata = { &udphs_clk, &mmc1_clk, // irq0 - &ohci_clk, - &tcb1_clk, +}; + +static struct clk_lookup periph_clocks_lookups[] = { + /* One additional fake clock for ohci */ + CLKDEV_CON_ID("ohci_clk", &uhphs_clk), + CLKDEV_CON_DEV_ID("ehci_clk", "atmel-ehci.0", &uhphs_clk), + CLKDEV_CON_DEV_ID("hclk", "atmel_usba_udc.0", &utmi_clk), + CLKDEV_CON_DEV_ID("pclk", "atmel_usba_udc.0", &udphs_clk), + CLKDEV_CON_DEV_ID("mci_clk", "at91_mci.0", &mmc0_clk), + CLKDEV_CON_DEV_ID("mci_clk", "at91_mci.1", &mmc1_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.0", &spi0_clk), + CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tcb0_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.1", &tcb0_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.4", &usart3_clk), }; /* @@ -256,6 +262,11 @@ static void __init at91sam9g45_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + if (cpu_is_at91sam9m10() || cpu_is_at91sam9m11()) clk_register(&vdec_clk); @@ -263,6 +274,18 @@ static void __init at91sam9g45_register_clocks(void) clk_register(&pck1); } +static struct clk_lookup console_clock_lookup; + +void __init at91sam9g45_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -306,11 +329,14 @@ static void at91sam9g45_poweroff(void) * AT91SAM9G45 processor initialization * -------------------------------------------------------------------- */ -void __init at91sam9g45_initialize(unsigned long main_clock) +void __init at91sam9g45_map_io(void) { /* Map peripherals */ iotable_init(at91sam9g45_io_desc, ARRAY_SIZE(at91sam9g45_io_desc)); +} +void __init at91sam9g45_initialize(unsigned long main_clock) +{ at91_arch_reset = at91sam9g45_reset; pm_power_off = at91sam9g45_poweroff; at91_extern_irq = (1 << AT91SAM9G45_ID_IRQ0); diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 1e8f275c17f6..05674865bc21 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -180,7 +180,6 @@ void __init at91_add_device_usbh_ehci(struct at91_usbh_data *data) } usbh_ehci_data = *data; - at91_clock_associate("uhphs_clk", &at91_usbh_ehci_device.dev, "ehci_clk"); platform_device_register(&at91_usbh_ehci_device); } #else @@ -256,7 +255,7 @@ void __init at91_add_device_usba(struct usba_platform_data *data) { usba_udc_data.pdata.vbus_pin = -EINVAL; usba_udc_data.pdata.num_ep = ARRAY_SIZE(usba_udc_ep); - memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep));; + memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep)); if (data && data->vbus_pin > 0) { at91_set_gpio_input(data->vbus_pin, 0); @@ -266,10 +265,6 @@ void __init at91_add_device_usba(struct usba_platform_data *data) /* Pullup pin is handled internally by USB device peripheral */ - /* Clocks */ - at91_clock_associate("utmi_clk", &at91_usba_udc_device.dev, "hclk"); - at91_clock_associate("udphs_clk", &at91_usba_udc_device.dev, "pclk"); - platform_device_register(&at91_usba_udc_device); } #else @@ -478,7 +473,6 @@ void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data) } mmc0_data = *data; - at91_clock_associate("mci0_clk", &at91sam9g45_mmc0_device.dev, "mci_clk"); platform_device_register(&at91sam9g45_mmc0_device); } else { /* MCI1 */ @@ -504,7 +498,6 @@ void __init at91_add_device_mci(short mmc_id, struct mci_platform_data *data) } mmc1_data = *data; - at91_clock_associate("mci1_clk", &at91sam9g45_mmc1_device.dev, "mci_clk"); platform_device_register(&at91sam9g45_mmc1_device); } @@ -801,7 +794,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PB1, 0); /* SPI0_MOSI */ at91_set_A_periph(AT91_PIN_PB2, 0); /* SPI0_SPCK */ - at91_clock_associate("spi0_clk", &at91sam9g45_spi0_device.dev, "spi_clk"); platform_device_register(&at91sam9g45_spi0_device); } if (enable_spi1) { @@ -809,7 +801,6 @@ void __init at91_add_device_spi(struct spi_board_info *devices, int nr_devices) at91_set_A_periph(AT91_PIN_PB15, 0); /* SPI1_MOSI */ at91_set_A_periph(AT91_PIN_PB16, 0); /* SPI1_SPCK */ - at91_clock_associate("spi1_clk", &at91sam9g45_spi1_device.dev, "spi_clk"); platform_device_register(&at91sam9g45_spi1_device); } } @@ -999,10 +990,7 @@ static struct platform_device at91sam9g45_tcb1_device = { static void __init at91_add_device_tc(void) { - /* this chip has one clock and irq for all six TC channels */ - at91_clock_associate("tcb0_clk", &at91sam9g45_tcb0_device.dev, "t0_clk"); platform_device_register(&at91sam9g45_tcb0_device); - at91_clock_associate("tcb1_clk", &at91sam9g45_tcb1_device.dev, "t0_clk"); platform_device_register(&at91sam9g45_tcb1_device); } #else @@ -1286,12 +1274,10 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91SAM9G45_ID_SSC0: pdev = &at91sam9g45_ssc0_device; configure_ssc0_pins(pins); - at91_clock_associate("ssc0_clk", &pdev->dev, "pclk"); break; case AT91SAM9G45_ID_SSC1: pdev = &at91sam9g45_ssc1_device; configure_ssc1_pins(pins); - at91_clock_associate("ssc1_clk", &pdev->dev, "pclk"); break; default: return; @@ -1527,37 +1513,34 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91sam9g45_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91SAM9G45_ID_US0: pdev = &at91sam9g45_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91SAM9G45_ID_US1: pdev = &at91sam9g45_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91SAM9G45_ID_US2: pdev = &at91sam9g45_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; case AT91SAM9G45_ID_US3: pdev = &at91sam9g45_uart3_device; configure_usart3_pins(pins); - at91_clock_associate("usart3_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1565,8 +1548,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91sam9g45_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91sam9rl.c b/arch/arm/mach-at91/at91sam9rl.c index 6a9d24e5ed8e..1a40f16b66c8 100644 --- a/arch/arm/mach-at91/at91sam9rl.c +++ b/arch/arm/mach-at91/at91sam9rl.c @@ -190,6 +190,24 @@ static struct clk *periph_clocks[] __initdata = { // irq0 }; +static struct clk_lookup periph_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("hclk", "atmel_usba_udc.0", &utmi_clk), + CLKDEV_CON_DEV_ID("pclk", "atmel_usba_udc.0", &udphs_clk), + CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk), + CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), + CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), +}; + +static struct clk_lookup usart_clocks_lookups[] = { + CLKDEV_CON_DEV_ID("usart", "atmel_usart.0", &mck), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.1", &usart0_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.2", &usart1_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.3", &usart2_clk), + CLKDEV_CON_DEV_ID("usart", "atmel_usart.4", &usart3_clk), +}; + /* * The two programmable clocks. * You must configure pin multiplexing to bring these signals out. @@ -214,10 +232,27 @@ static void __init at91sam9rl_register_clocks(void) for (i = 0; i < ARRAY_SIZE(periph_clocks); i++) clk_register(periph_clocks[i]); + clkdev_add_table(periph_clocks_lookups, + ARRAY_SIZE(periph_clocks_lookups)); + clkdev_add_table(usart_clocks_lookups, + ARRAY_SIZE(usart_clocks_lookups)); + clk_register(&pck0); clk_register(&pck1); } +static struct clk_lookup console_clock_lookup; + +void __init at91sam9rl_set_console_clock(int id) +{ + if (id >= ARRAY_SIZE(usart_clocks_lookups)) + return; + + console_clock_lookup.con_id = "usart"; + console_clock_lookup.clk = usart_clocks_lookups[id].clk; + clkdev_add(&console_clock_lookup); +} + /* -------------------------------------------------------------------- * GPIO * -------------------------------------------------------------------- */ @@ -252,7 +287,7 @@ static void at91sam9rl_poweroff(void) * AT91SAM9RL processor initialization * -------------------------------------------------------------------- */ -void __init at91sam9rl_initialize(unsigned long main_clock) +void __init at91sam9rl_map_io(void) { unsigned long cidr, sram_size; @@ -275,7 +310,10 @@ void __init at91sam9rl_initialize(unsigned long main_clock) /* Map SRAM */ iotable_init(at91sam9rl_sram_desc, ARRAY_SIZE(at91sam9rl_sram_desc)); +} +void __init at91sam9rl_initialize(unsigned long main_clock) +{ at91_arch_reset = at91sam9_alt_reset; pm_power_off = at91sam9rl_poweroff; at91_extern_irq = (1 << AT91SAM9RL_ID_IRQ0); diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c index 53aaa94df75a..c296045f2b6a 100644 --- a/arch/arm/mach-at91/at91sam9rl_devices.c +++ b/arch/arm/mach-at91/at91sam9rl_devices.c @@ -145,7 +145,7 @@ void __init at91_add_device_usba(struct usba_platform_data *data) */ usba_udc_data.pdata.vbus_pin = -EINVAL; usba_udc_data.pdata.num_ep = ARRAY_SIZE(usba_udc_ep); - memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep));; + memcpy(usba_udc_data.ep, usba_udc_ep, sizeof(usba_udc_ep)); if (data && data->vbus_pin > 0) { at91_set_gpio_input(data->vbus_pin, 0); @@ -155,10 +155,6 @@ void __init at91_add_device_usba(struct usba_platform_data *data) /* Pullup pin is handled internally by USB device peripheral */ - /* Clocks */ - at91_clock_associate("utmi_clk", &at91_usba_udc_device.dev, "hclk"); - at91_clock_associate("udphs_clk", &at91_usba_udc_device.dev, "pclk"); - platform_device_register(&at91_usba_udc_device); } #else @@ -605,10 +601,6 @@ static struct platform_device at91sam9rl_tcb_device = { static void __init at91_add_device_tc(void) { - /* this chip has a separate clock and irq for each TC channel */ - at91_clock_associate("tc0_clk", &at91sam9rl_tcb_device.dev, "t0_clk"); - at91_clock_associate("tc1_clk", &at91sam9rl_tcb_device.dev, "t1_clk"); - at91_clock_associate("tc2_clk", &at91sam9rl_tcb_device.dev, "t2_clk"); platform_device_register(&at91sam9rl_tcb_device); } #else @@ -892,12 +884,10 @@ void __init at91_add_device_ssc(unsigned id, unsigned pins) case AT91SAM9RL_ID_SSC0: pdev = &at91sam9rl_ssc0_device; configure_ssc0_pins(pins); - at91_clock_associate("ssc0_clk", &pdev->dev, "pclk"); break; case AT91SAM9RL_ID_SSC1: pdev = &at91sam9rl_ssc1_device; configure_ssc1_pins(pins); - at91_clock_associate("ssc1_clk", &pdev->dev, "pclk"); break; default: return; @@ -1141,37 +1131,34 @@ struct platform_device *atmel_default_console_device; /* the serial console devi void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) { struct platform_device *pdev; + struct atmel_uart_data *pdata; switch (id) { case 0: /* DBGU */ pdev = &at91sam9rl_dbgu_device; configure_dbgu_pins(); - at91_clock_associate("mck", &pdev->dev, "usart"); break; case AT91SAM9RL_ID_US0: pdev = &at91sam9rl_uart0_device; configure_usart0_pins(pins); - at91_clock_associate("usart0_clk", &pdev->dev, "usart"); break; case AT91SAM9RL_ID_US1: pdev = &at91sam9rl_uart1_device; configure_usart1_pins(pins); - at91_clock_associate("usart1_clk", &pdev->dev, "usart"); break; case AT91SAM9RL_ID_US2: pdev = &at91sam9rl_uart2_device; configure_usart2_pins(pins); - at91_clock_associate("usart2_clk", &pdev->dev, "usart"); break; case AT91SAM9RL_ID_US3: pdev = &at91sam9rl_uart3_device; configure_usart3_pins(pins); - at91_clock_associate("usart3_clk", &pdev->dev, "usart"); break; default: return; } - pdev->id = portnr; /* update to mapped ID */ + pdata = pdev->dev.platform_data; + pdata->num = portnr; /* update to mapped ID */ if (portnr < ATMEL_MAX_UART) at91_uarts[portnr] = pdev; @@ -1179,8 +1166,10 @@ void __init at91_register_uart(unsigned id, unsigned portnr, unsigned pins) void __init at91_set_serial_console(unsigned portnr) { - if (portnr < ATMEL_MAX_UART) + if (portnr < ATMEL_MAX_UART) { atmel_default_console_device = at91_uarts[portnr]; + at91sam9rl_set_console_clock(portnr); + } } void __init at91_add_device_serial(void) diff --git a/arch/arm/mach-at91/at91x40.c b/arch/arm/mach-at91/at91x40.c index ad3ec85b2790..56ba3bd035ae 100644 --- a/arch/arm/mach-at91/at91x40.c +++ b/arch/arm/mach-at91/at91x40.c @@ -37,11 +37,6 @@ unsigned long clk_get_rate(struct clk *clk) return AT91X40_MASTER_CLOCK; } -struct clk *clk_get(struct device *dev, const char *id) -{ - return NULL; -} - void __init at91x40_initialize(unsigned long main_clock) { at91_extern_irq = (1 << AT91X40_ID_IRQ0) | (1 << AT91X40_ID_IRQ1) diff --git a/arch/arm/mach-at91/board-1arm.c b/arch/arm/mach-at91/board-1arm.c index 8a3fc84847c1..ab1d463aa47d 100644 --- a/arch/arm/mach-at91/board-1arm.c +++ b/arch/arm/mach-at91/board-1arm.c @@ -35,14 +35,18 @@ #include #include +#include #include "generic.h" -static void __init onearm_map_io(void) +static void __init onearm_init_early(void) { + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_PQFP); + at91rm9200_initialize(18432000); /* DBGU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); @@ -92,9 +96,9 @@ static void __init onearm_board_init(void) MACHINE_START(ONEARM, "Ajeco 1ARM single board computer") /* Maintainer: Lennert Buytenhek */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = onearm_map_io, + .map_io = at91rm9200_map_io, + .init_early = onearm_init_early, .init_irq = onearm_init_irq, .init_machine = onearm_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-afeb-9260v1.c b/arch/arm/mach-at91/board-afeb-9260v1.c index cba7f7771fee..a4924de48c36 100644 --- a/arch/arm/mach-at91/board-afeb-9260v1.c +++ b/arch/arm/mach-at91/board-afeb-9260v1.c @@ -48,7 +48,7 @@ #include "generic.h" -static void __init afeb9260_map_io(void) +static void __init afeb9260_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -218,9 +218,9 @@ static void __init afeb9260_board_init(void) MACHINE_START(AFEB9260, "Custom afeb9260 board") /* Maintainer: Sergey Lapin */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = afeb9260_map_io, + .map_io = at91sam9260_map_io, + .init_early = afeb9260_init_early, .init_irq = afeb9260_init_irq, .init_machine = afeb9260_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-at572d940hf_ek.c b/arch/arm/mach-at91/board-at572d940hf_ek.c deleted file mode 100644 index 3929f1c9e4e5..000000000000 --- a/arch/arm/mach-at91/board-at572d940hf_ek.c +++ /dev/null @@ -1,326 +0,0 @@ -/* - * linux/arch/arm/mach-at91/board-at572d940hf_ek.c - * - * Copyright (C) 2008 Atmel Antonio R. Costa - * Copyright (C) 2005 SAN People - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include "sam9_smc.h" -#include "generic.h" - - -static void __init eb_map_io(void) -{ - /* Initialize processor: 12.500 MHz crystal */ - at572d940hf_initialize(12000000); - - /* DBGU on ttyS0. (Rx & Tx only) */ - at91_register_uart(0, 0, 0); - - /* USART0 on ttyS1. (Rx & Tx only) */ - at91_register_uart(AT572D940HF_ID_US0, 1, 0); - - /* USART1 on ttyS2. (Rx & Tx only) */ - at91_register_uart(AT572D940HF_ID_US1, 2, 0); - - /* USART2 on ttyS3. (Tx & Rx only */ - at91_register_uart(AT572D940HF_ID_US2, 3, 0); - - /* set serial console to ttyS0 (ie, DBGU) */ - at91_set_serial_console(0); -} - -static void __init eb_init_irq(void) -{ - at572d940hf_init_interrupts(NULL); -} - - -/* - * USB Host Port - */ -static struct at91_usbh_data __initdata eb_usbh_data = { - .ports = 2, -}; - - -/* - * USB Device Port - */ -static struct at91_udc_data __initdata eb_udc_data = { - .vbus_pin = 0, /* no VBUS detection,UDC always on */ - .pullup_pin = 0, /* pull-up driven by UDC */ -}; - - -/* - * MCI (SD/MMC) - */ -static struct at91_mmc_data __initdata eb_mmc_data = { - .wire4 = 1, -/* .det_pin = ... not connected */ -/* .wp_pin = ... not connected */ -/* .vcc_pin = ... not connected */ -}; - - -/* - * MACB Ethernet device - */ -static struct at91_eth_data __initdata eb_eth_data = { - .phy_irq_pin = AT91_PIN_PB25, - .is_rmii = 1, -}; - -/* - * NOR flash - */ - -static struct mtd_partition eb_nor_partitions[] = { - { - .name = "Raw Environment", - .offset = 0, - .size = SZ_4M, - .mask_flags = 0, - }, - { - .name = "OS FS", - .offset = MTDPART_OFS_APPEND, - .size = 3 * SZ_1M, - .mask_flags = 0, - }, - { - .name = "APP FS", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - .mask_flags = 0, - }, -}; - -static void nor_flash_set_vpp(struct map_info* mi, int i) { -}; - -static struct physmap_flash_data nor_flash_data = { - .width = 4, - .parts = eb_nor_partitions, - .nr_parts = ARRAY_SIZE(eb_nor_partitions), - .set_vpp = nor_flash_set_vpp, -}; - -static struct resource nor_flash_resources[] = { - { - .start = AT91_CHIPSELECT_0, - .end = AT91_CHIPSELECT_0 + SZ_16M - 1, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device nor_flash = { - .name = "physmap-flash", - .id = 0, - .dev = { - .platform_data = &nor_flash_data, - }, - .resource = nor_flash_resources, - .num_resources = ARRAY_SIZE(nor_flash_resources), -}; - -static struct sam9_smc_config __initdata eb_nor_smc_config = { - .ncs_read_setup = 1, - .nrd_setup = 1, - .ncs_write_setup = 1, - .nwe_setup = 1, - - .ncs_read_pulse = 7, - .nrd_pulse = 7, - .ncs_write_pulse = 7, - .nwe_pulse = 7, - - .read_cycle = 9, - .write_cycle = 9, - - .mode = AT91_SMC_READMODE | AT91_SMC_WRITEMODE | AT91_SMC_EXNWMODE_DISABLE | AT91_SMC_BAT_WRITE | AT91_SMC_DBW_32, - .tdf_cycles = 1, -}; - -static void __init eb_add_device_nor(void) -{ - /* configure chip-select 0 (NOR) */ - sam9_smc_configure(0, &eb_nor_smc_config); - platform_device_register(&nor_flash); -} - -/* - * NAND flash - */ -static struct mtd_partition __initdata eb_nand_partition[] = { - { - .name = "Partition 1", - .offset = 0, - .size = SZ_16M, - }, - { - .name = "Partition 2", - .offset = MTDPART_OFS_NXTBLK, - .size = MTDPART_SIZ_FULL, - } -}; - -static struct mtd_partition * __init nand_partitions(int size, int *num_partitions) -{ - *num_partitions = ARRAY_SIZE(eb_nand_partition); - return eb_nand_partition; -} - -static struct atmel_nand_data __initdata eb_nand_data = { - .ale = 22, - .cle = 21, -/* .det_pin = ... not connected */ -/* .rdy_pin = AT91_PIN_PC16, */ - .enable_pin = AT91_PIN_PA15, - .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif -}; - -static struct sam9_smc_config __initdata eb_nand_smc_config = { - .ncs_read_setup = 0, - .nrd_setup = 0, - .ncs_write_setup = 1, - .nwe_setup = 1, - - .ncs_read_pulse = 3, - .nrd_pulse = 3, - .ncs_write_pulse = 3, - .nwe_pulse = 3, - - .read_cycle = 5, - .write_cycle = 5, - - .mode = AT91_SMC_READMODE | AT91_SMC_WRITEMODE | AT91_SMC_EXNWMODE_DISABLE, - .tdf_cycles = 12, -}; - -static void __init eb_add_device_nand(void) -{ - /* setup bus-width (8 or 16) */ - if (eb_nand_data.bus_width_16) - eb_nand_smc_config.mode |= AT91_SMC_DBW_16; - else - eb_nand_smc_config.mode |= AT91_SMC_DBW_8; - - /* configure chip-select 3 (NAND) */ - sam9_smc_configure(3, &eb_nand_smc_config); - - at91_add_device_nand(&eb_nand_data); -} - - -/* - * SPI devices - */ -static struct resource rtc_resources[] = { - [0] = { - .start = AT572D940HF_ID_IRQ1, - .end = AT572D940HF_ID_IRQ1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct ds1305_platform_data ds1306_data = { - .is_ds1306 = true, - .en_1hz = false, -}; - -static struct spi_board_info eb_spi_devices[] = { - { /* RTC Dallas DS1306 */ - .modalias = "rtc-ds1305", - .chip_select = 3, - .mode = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA, - .max_speed_hz = 500000, - .bus_num = 0, - .irq = AT572D940HF_ID_IRQ1, - .platform_data = (void *) &ds1306_data, - }, -#if defined(CONFIG_MTD_AT91_DATAFLASH_CARD) - { /* Dataflash card */ - .modalias = "mtd_dataflash", - .chip_select = 0, - .max_speed_hz = 15 * 1000 * 1000, - .bus_num = 0, - }, -#endif -}; - -static void __init eb_board_init(void) -{ - /* Serial */ - at91_add_device_serial(); - /* USB Host */ - at91_add_device_usbh(&eb_usbh_data); - /* USB Device */ - at91_add_device_udc(&eb_udc_data); - /* I2C */ - at91_add_device_i2c(NULL, 0); - /* NOR */ - eb_add_device_nor(); - /* NAND */ - eb_add_device_nand(); - /* SPI */ - at91_add_device_spi(eb_spi_devices, ARRAY_SIZE(eb_spi_devices)); - /* MMC */ - at91_add_device_mmc(0, &eb_mmc_data); - /* Ethernet */ - at91_add_device_eth(&eb_eth_data); - /* mAgic */ - at91_add_device_mAgic(); -} - -MACHINE_START(AT572D940HFEB, "Atmel AT91D940HF-EB") - /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, - .timer = &at91sam926x_timer, - .map_io = eb_map_io, - .init_irq = eb_init_irq, - .init_machine = eb_board_init, -MACHINE_END diff --git a/arch/arm/mach-at91/board-cam60.c b/arch/arm/mach-at91/board-cam60.c index b54e3e6fceb6..148fccb9a25a 100644 --- a/arch/arm/mach-at91/board-cam60.c +++ b/arch/arm/mach-at91/board-cam60.c @@ -45,7 +45,7 @@ #include "generic.h" -static void __init cam60_map_io(void) +static void __init cam60_init_early(void) { /* Initialize processor: 10 MHz crystal */ at91sam9260_initialize(10000000); @@ -198,9 +198,9 @@ static void __init cam60_board_init(void) MACHINE_START(CAM60, "KwikByte CAM60") /* Maintainer: KwikByte */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = cam60_map_io, + .map_io = at91sam9260_map_io, + .init_early = cam60_init_early, .init_irq = cam60_init_irq, .init_machine = cam60_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-cap9adk.c b/arch/arm/mach-at91/board-cap9adk.c index e7274440ead9..1904fdf87613 100644 --- a/arch/arm/mach-at91/board-cap9adk.c +++ b/arch/arm/mach-at91/board-cap9adk.c @@ -44,12 +44,13 @@ #include #include #include +#include #include "sam9_smc.h" #include "generic.h" -static void __init cap9adk_map_io(void) +static void __init cap9adk_init_early(void) { /* Initialize processor: 12 MHz crystal */ at91cap9_initialize(12000000); @@ -187,11 +188,6 @@ static struct atmel_nand_data __initdata cap9adk_nand_data = { // .rdy_pin = ... not connected .enable_pin = AT91_PIN_PD15, .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif }; static struct sam9_smc_config __initdata cap9adk_nand_smc_config = { @@ -219,6 +215,7 @@ static void __init cap9adk_add_device_nand(void) csa = at91_sys_read(AT91_MATRIX_EBICSA); at91_sys_write(AT91_MATRIX_EBICSA, csa | AT91_MATRIX_EBI_VDDIOMSEL_3_3V); + cap9adk_nand_data.bus_width_16 = !board_have_nand_8bit(); /* setup bus-width (8 or 16) */ if (cap9adk_nand_data.bus_width_16) cap9adk_nand_smc_config.mode |= AT91_SMC_DBW_16; @@ -399,9 +396,9 @@ static void __init cap9adk_board_init(void) MACHINE_START(AT91CAP9ADK, "Atmel AT91CAP9A-DK") /* Maintainer: Stelian Pop */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = cap9adk_map_io, + .map_io = at91cap9_map_io, + .init_early = cap9adk_init_early, .init_irq = cap9adk_init_irq, .init_machine = cap9adk_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-carmeva.c b/arch/arm/mach-at91/board-carmeva.c index 295e1e77fa60..f36b18687494 100644 --- a/arch/arm/mach-at91/board-carmeva.c +++ b/arch/arm/mach-at91/board-carmeva.c @@ -40,10 +40,10 @@ #include "generic.h" -static void __init carmeva_map_io(void) +static void __init carmeva_init_early(void) { /* Initialize processor: 20.000 MHz crystal */ - at91rm9200_initialize(20000000, AT91RM9200_BGA); + at91rm9200_initialize(20000000); /* DBGU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); @@ -162,9 +162,9 @@ static void __init carmeva_board_init(void) MACHINE_START(CARMEVA, "Carmeva") /* Maintainer: Conitec Datasystems */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = carmeva_map_io, + .map_io = at91rm9200_map_io, + .init_early = carmeva_init_early, .init_irq = carmeva_init_irq, .init_machine = carmeva_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-cpu9krea.c b/arch/arm/mach-at91/board-cpu9krea.c index 3838594578f3..980511084fe4 100644 --- a/arch/arm/mach-at91/board-cpu9krea.c +++ b/arch/arm/mach-at91/board-cpu9krea.c @@ -47,7 +47,7 @@ #include "sam9_smc.h" #include "generic.h" -static void __init cpu9krea_map_io(void) +static void __init cpu9krea_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -375,9 +375,9 @@ MACHINE_START(CPUAT9260, "Eukrea CPU9260") MACHINE_START(CPUAT9G20, "Eukrea CPU9G20") #endif /* Maintainer: Eric Benard - EUKREA Electromatique */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = cpu9krea_map_io, + .map_io = at91sam9260_map_io, + .init_early = cpu9krea_init_early, .init_irq = cpu9krea_init_irq, .init_machine = cpu9krea_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-cpuat91.c b/arch/arm/mach-at91/board-cpuat91.c index 2f4dd8cdd484..6daabe3907a1 100644 --- a/arch/arm/mach-at91/board-cpuat91.c +++ b/arch/arm/mach-at91/board-cpuat91.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "generic.h" @@ -50,10 +51,13 @@ static struct gpio_led cpuat91_leds[] = { }, }; -static void __init cpuat91_map_io(void) +static void __init cpuat91_init_early(void) { + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_PQFP); + at91rm9200_initialize(18432000); /* DBGU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); @@ -175,9 +179,9 @@ static void __init cpuat91_board_init(void) MACHINE_START(CPUAT91, "Eukrea") /* Maintainer: Eric Benard - EUKREA Electromatique */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = cpuat91_map_io, + .map_io = at91rm9200_map_io, + .init_early = cpuat91_init_early, .init_irq = cpuat91_init_irq, .init_machine = cpuat91_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-csb337.c b/arch/arm/mach-at91/board-csb337.c index 464839dc39bd..d98bcec1dfe0 100644 --- a/arch/arm/mach-at91/board-csb337.c +++ b/arch/arm/mach-at91/board-csb337.c @@ -43,10 +43,10 @@ #include "generic.h" -static void __init csb337_map_io(void) +static void __init csb337_init_early(void) { /* Initialize processor: 3.6864 MHz crystal */ - at91rm9200_initialize(3686400, AT91RM9200_BGA); + at91rm9200_initialize(3686400); /* Setup the LEDs */ at91_init_leds(AT91_PIN_PB0, AT91_PIN_PB1); @@ -257,9 +257,9 @@ static void __init csb337_board_init(void) MACHINE_START(CSB337, "Cogent CSB337") /* Maintainer: Bill Gatliff */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = csb337_map_io, + .map_io = at91rm9200_map_io, + .init_early = csb337_init_early, .init_irq = csb337_init_irq, .init_machine = csb337_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-csb637.c b/arch/arm/mach-at91/board-csb637.c index 431688c61412..019aab4e20b0 100644 --- a/arch/arm/mach-at91/board-csb637.c +++ b/arch/arm/mach-at91/board-csb637.c @@ -40,10 +40,10 @@ #include "generic.h" -static void __init csb637_map_io(void) +static void __init csb637_init_early(void) { /* Initialize processor: 3.6864 MHz crystal */ - at91rm9200_initialize(3686400, AT91RM9200_BGA); + at91rm9200_initialize(3686400); /* DBGU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); @@ -138,9 +138,9 @@ static void __init csb637_board_init(void) MACHINE_START(CSB637, "Cogent CSB637") /* Maintainer: Bill Gatliff */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = csb637_map_io, + .map_io = at91rm9200_map_io, + .init_early = csb637_init_early, .init_irq = csb637_init_irq, .init_machine = csb637_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-eb01.c b/arch/arm/mach-at91/board-eb01.c index 1f9d3cb64c50..d2023f27c652 100644 --- a/arch/arm/mach-at91/board-eb01.c +++ b/arch/arm/mach-at91/board-eb01.c @@ -30,7 +30,12 @@ #include #include "generic.h" -static void __init at91eb01_map_io(void) +static void __init at91eb01_init_irq(void) +{ + at91x40_init_interrupts(NULL); +} + +static void __init at91eb01_init_early(void) { at91x40_initialize(40000000); } @@ -38,7 +43,7 @@ static void __init at91eb01_map_io(void) MACHINE_START(AT91EB01, "Atmel AT91 EB01") /* Maintainer: Greg Ungerer */ .timer = &at91x40_timer, - .init_irq = at91x40_init_interrupts, - .map_io = at91eb01_map_io, + .init_early = at91eb01_init_early, + .init_irq = at91eb01_init_irq, MACHINE_END diff --git a/arch/arm/mach-at91/board-eb9200.c b/arch/arm/mach-at91/board-eb9200.c index 6cf6566ae346..e9484535cbc8 100644 --- a/arch/arm/mach-at91/board-eb9200.c +++ b/arch/arm/mach-at91/board-eb9200.c @@ -40,10 +40,10 @@ #include "generic.h" -static void __init eb9200_map_io(void) +static void __init eb9200_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_BGA); + at91rm9200_initialize(18432000); /* DBGU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); @@ -120,9 +120,9 @@ static void __init eb9200_board_init(void) } MACHINE_START(ATEB9200, "Embest ATEB9200") - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = eb9200_map_io, + .map_io = at91rm9200_map_io, + .init_early = eb9200_init_early, .init_irq = eb9200_init_irq, .init_machine = eb9200_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-ecbat91.c b/arch/arm/mach-at91/board-ecbat91.c index de2fd04e7c8a..a6f57faa10a7 100644 --- a/arch/arm/mach-at91/board-ecbat91.c +++ b/arch/arm/mach-at91/board-ecbat91.c @@ -38,14 +38,18 @@ #include #include +#include #include "generic.h" -static void __init ecb_at91map_io(void) +static void __init ecb_at91init_early(void) { + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_PQFP); + at91rm9200_initialize(18432000); /* Setup the LEDs */ at91_init_leds(AT91_PIN_PC7, AT91_PIN_PC7); @@ -168,9 +172,9 @@ static void __init ecb_at91board_init(void) MACHINE_START(ECBAT91, "emQbit's ECB_AT91") /* Maintainer: emQbit.com */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = ecb_at91map_io, + .map_io = at91rm9200_map_io, + .init_early = ecb_at91init_early, .init_irq = ecb_at91init_irq, .init_machine = ecb_at91board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-eco920.c b/arch/arm/mach-at91/board-eco920.c index a158a0ce458f..bfc0062d1483 100644 --- a/arch/arm/mach-at91/board-eco920.c +++ b/arch/arm/mach-at91/board-eco920.c @@ -26,11 +26,16 @@ #include #include +#include + #include "generic.h" -static void __init eco920_map_io(void) +static void __init eco920_init_early(void) { - at91rm9200_initialize(18432000, AT91RM9200_PQFP); + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + + at91rm9200_initialize(18432000); /* Setup the LEDs */ at91_init_leds(AT91_PIN_PB0, AT91_PIN_PB1); @@ -86,21 +91,6 @@ static struct platform_device eco920_flash = { .num_resources = 1, }; -static struct resource at91_beeper_resources[] = { - [0] = { - .start = AT91RM9200_BASE_TC3, - .end = AT91RM9200_BASE_TC3 + 0x39, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device at91_beeper = { - .name = "at91_beeper", - .id = 0, - .resource = at91_beeper_resources, - .num_resources = ARRAY_SIZE(at91_beeper_resources), -}; - static struct spi_board_info eco920_spi_devices[] = { { /* CAN controller */ .modalias = "tlv5638", @@ -139,18 +129,14 @@ static void __init eco920_board_init(void) AT91_SMC_TDF_(1) /* float time */ ); - at91_clock_associate("tc3_clk", &at91_beeper.dev, "at91_beeper"); - at91_set_B_periph(AT91_PIN_PB6, 0); - platform_device_register(&at91_beeper); - at91_add_device_spi(eco920_spi_devices, ARRAY_SIZE(eco920_spi_devices)); } MACHINE_START(ECO920, "eco920") /* Maintainer: Sascha Hauer */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = eco920_map_io, + .map_io = at91rm9200_map_io, + .init_early = eco920_init_early, .init_irq = eco920_init_irq, .init_machine = eco920_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-flexibity.c b/arch/arm/mach-at91/board-flexibity.c index c8a62dc8fa65..466c063b8d21 100644 --- a/arch/arm/mach-at91/board-flexibity.c +++ b/arch/arm/mach-at91/board-flexibity.c @@ -37,7 +37,7 @@ #include "generic.h" -static void __init flexibity_map_io(void) +static void __init flexibity_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -154,9 +154,9 @@ static void __init flexibity_board_init(void) MACHINE_START(FLEXIBITY, "Flexibity Connect") /* Maintainer: Maxim Osipov */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = flexibity_map_io, + .map_io = at91sam9260_map_io, + .init_early = flexibity_init_early, .init_irq = flexibity_init_irq, .init_machine = flexibity_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-foxg20.c b/arch/arm/mach-at91/board-foxg20.c index dfc7dfe738e4..e2d1dc9eff45 100644 --- a/arch/arm/mach-at91/board-foxg20.c +++ b/arch/arm/mach-at91/board-foxg20.c @@ -57,7 +57,7 @@ */ -static void __init foxg20_map_io(void) +static void __init foxg20_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -266,9 +266,9 @@ static void __init foxg20_board_init(void) MACHINE_START(ACMENETUSFOXG20, "Acme Systems srl FOX Board G20") /* Maintainer: Sergio Tanzilli */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = foxg20_map_io, + .map_io = at91sam9260_map_io, + .init_early = foxg20_init_early, .init_irq = foxg20_init_irq, .init_machine = foxg20_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-gsia18s.c b/arch/arm/mach-at91/board-gsia18s.c index bc28136ee249..1d4f36b3cb27 100644 --- a/arch/arm/mach-at91/board-gsia18s.c +++ b/arch/arm/mach-at91/board-gsia18s.c @@ -38,9 +38,9 @@ #include "sam9_smc.h" #include "generic.h" -static void __init gsia18s_map_io(void) +static void __init gsia18s_init_early(void) { - stamp9g20_map_io(); + stamp9g20_init_early(); /* * USART0 on ttyS1 (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI). @@ -576,9 +576,9 @@ static void __init gsia18s_board_init(void) } MACHINE_START(GSIA18S, "GS_IA18_S") - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = gsia18s_map_io, + .map_io = at91sam9260_map_io, + .init_early = gsia18s_init_early, .init_irq = init_irq, .init_machine = gsia18s_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-kafa.c b/arch/arm/mach-at91/board-kafa.c index d2e1f4ec1fcc..9b003ff744ba 100644 --- a/arch/arm/mach-at91/board-kafa.c +++ b/arch/arm/mach-at91/board-kafa.c @@ -35,14 +35,18 @@ #include #include +#include #include "generic.h" -static void __init kafa_map_io(void) +static void __init kafa_init_early(void) { + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_PQFP); + at91rm9200_initialize(18432000); /* Set up the LEDs */ at91_init_leds(AT91_PIN_PB4, AT91_PIN_PB4); @@ -94,9 +98,9 @@ static void __init kafa_board_init(void) MACHINE_START(KAFA, "Sperry-Sun KAFA") /* Maintainer: Sergei Sharonov */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = kafa_map_io, + .map_io = at91rm9200_map_io, + .init_early = kafa_init_early, .init_irq = kafa_init_irq, .init_machine = kafa_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-kb9202.c b/arch/arm/mach-at91/board-kb9202.c index a13d2063faff..a813a74b65f9 100644 --- a/arch/arm/mach-at91/board-kb9202.c +++ b/arch/arm/mach-at91/board-kb9202.c @@ -36,16 +36,19 @@ #include #include - +#include #include #include "generic.h" -static void __init kb9202_map_io(void) +static void __init kb9202_init_early(void) { + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + /* Initialize processor: 10 MHz crystal */ - at91rm9200_initialize(10000000, AT91RM9200_PQFP); + at91rm9200_initialize(10000000); /* Set up the LEDs */ at91_init_leds(AT91_PIN_PC19, AT91_PIN_PC18); @@ -136,9 +139,9 @@ static void __init kb9202_board_init(void) MACHINE_START(KB9200, "KB920x") /* Maintainer: KwikByte, Inc. */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = kb9202_map_io, + .map_io = at91rm9200_map_io, + .init_early = kb9202_init_early, .init_irq = kb9202_init_irq, .init_machine = kb9202_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-neocore926.c b/arch/arm/mach-at91/board-neocore926.c index fe5f1d47e6e2..961e805db68c 100644 --- a/arch/arm/mach-at91/board-neocore926.c +++ b/arch/arm/mach-at91/board-neocore926.c @@ -51,7 +51,7 @@ #include "generic.h" -static void __init neocore926_map_io(void) +static void __init neocore926_init_early(void) { /* Initialize processor: 20 MHz crystal */ at91sam9263_initialize(20000000); @@ -387,9 +387,9 @@ static void __init neocore926_board_init(void) MACHINE_START(NEOCORE926, "ADENEO NEOCORE 926") /* Maintainer: ADENEO */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = neocore926_map_io, + .map_io = at91sam9263_map_io, + .init_early = neocore926_init_early, .init_irq = neocore926_init_irq, .init_machine = neocore926_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-pcontrol-g20.c b/arch/arm/mach-at91/board-pcontrol-g20.c index feb65787c30b..21a21af25878 100644 --- a/arch/arm/mach-at91/board-pcontrol-g20.c +++ b/arch/arm/mach-at91/board-pcontrol-g20.c @@ -37,9 +37,9 @@ #include "generic.h" -static void __init pcontrol_g20_map_io(void) +static void __init pcontrol_g20_init_early(void) { - stamp9g20_map_io(); + stamp9g20_init_early(); /* USART0 on ttyS1. (Rx, Tx, CTS, RTS) piggyback A2 */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS @@ -222,9 +222,9 @@ static void __init pcontrol_g20_board_init(void) MACHINE_START(PCONTROL_G20, "PControl G20") /* Maintainer: pgsellmann@portner-elektronik.at */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = pcontrol_g20_map_io, + .map_io = at91sam9260_map_io, + .init_early = pcontrol_g20_init_early, .init_irq = init_irq, .init_machine = pcontrol_g20_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-picotux200.c b/arch/arm/mach-at91/board-picotux200.c index 55dad3a46547..756cc2a745dd 100644 --- a/arch/arm/mach-at91/board-picotux200.c +++ b/arch/arm/mach-at91/board-picotux200.c @@ -43,10 +43,10 @@ #include "generic.h" -static void __init picotux200_map_io(void) +static void __init picotux200_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_BGA); + at91rm9200_initialize(18432000); /* DBGU on ttyS0. (Rx & Tx only) */ at91_register_uart(0, 0, 0); @@ -123,9 +123,9 @@ static void __init picotux200_board_init(void) MACHINE_START(PICOTUX2XX, "picotux 200") /* Maintainer: Kleinhenz Elektronik GmbH */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = picotux200_map_io, + .map_io = at91rm9200_map_io, + .init_early = picotux200_init_early, .init_irq = picotux200_init_irq, .init_machine = picotux200_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-qil-a9260.c b/arch/arm/mach-at91/board-qil-a9260.c index 69d15a875b66..d1a6001b0bd8 100644 --- a/arch/arm/mach-at91/board-qil-a9260.c +++ b/arch/arm/mach-at91/board-qil-a9260.c @@ -48,7 +48,7 @@ #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 12.000 MHz crystal */ at91sam9260_initialize(12000000); @@ -268,9 +268,9 @@ static void __init ek_board_init(void) MACHINE_START(QIL_A9260, "CALAO QIL_A9260") /* Maintainer: calao-systems */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9260_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-rm9200dk.c b/arch/arm/mach-at91/board-rm9200dk.c index 4c1047c8200d..aef9627710b0 100644 --- a/arch/arm/mach-at91/board-rm9200dk.c +++ b/arch/arm/mach-at91/board-rm9200dk.c @@ -45,10 +45,10 @@ #include "generic.h" -static void __init dk_map_io(void) +static void __init dk_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_BGA); + at91rm9200_initialize(18432000); /* Setup the LEDs */ at91_init_leds(AT91_PIN_PB2, AT91_PIN_PB2); @@ -227,9 +227,9 @@ static void __init dk_board_init(void) MACHINE_START(AT91RM9200DK, "Atmel AT91RM9200-DK") /* Maintainer: SAN People/Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = dk_map_io, + .map_io = at91rm9200_map_io, + .init_early = dk_init_early, .init_irq = dk_init_irq, .init_machine = dk_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-rm9200ek.c b/arch/arm/mach-at91/board-rm9200ek.c index 9df1be8818c0..015a02183080 100644 --- a/arch/arm/mach-at91/board-rm9200ek.c +++ b/arch/arm/mach-at91/board-rm9200ek.c @@ -45,10 +45,10 @@ #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_BGA); + at91rm9200_initialize(18432000); /* Setup the LEDs */ at91_init_leds(AT91_PIN_PB1, AT91_PIN_PB2); @@ -193,9 +193,9 @@ static void __init ek_board_init(void) MACHINE_START(AT91RM9200EK, "Atmel AT91RM9200-EK") /* Maintainer: SAN People/Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = ek_map_io, + .map_io = at91rm9200_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9-l9260.c b/arch/arm/mach-at91/board-sam9-l9260.c index 25a26beaa728..aaf1bf0989b3 100644 --- a/arch/arm/mach-at91/board-sam9-l9260.c +++ b/arch/arm/mach-at91/board-sam9-l9260.c @@ -44,7 +44,7 @@ #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -212,9 +212,9 @@ static void __init ek_board_init(void) MACHINE_START(SAM9_L9260, "Olimex SAM9-L9260") /* Maintainer: Olimex */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9260_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9260ek.c b/arch/arm/mach-at91/board-sam9260ek.c index de1816e0e1d9..d600dc123227 100644 --- a/arch/arm/mach-at91/board-sam9260ek.c +++ b/arch/arm/mach-at91/board-sam9260ek.c @@ -44,12 +44,13 @@ #include #include #include +#include #include "sam9_smc.h" #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -191,11 +192,6 @@ static struct atmel_nand_data __initdata ek_nand_data = { .rdy_pin = AT91_PIN_PC13, .enable_pin = AT91_PIN_PC14, .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif }; static struct sam9_smc_config __initdata ek_nand_smc_config = { @@ -218,6 +214,7 @@ static struct sam9_smc_config __initdata ek_nand_smc_config = { static void __init ek_add_device_nand(void) { + ek_nand_data.bus_width_16 = !board_have_nand_8bit(); /* setup bus-width (8 or 16) */ if (ek_nand_data.bus_width_16) ek_nand_smc_config.mode |= AT91_SMC_DBW_16; @@ -356,9 +353,9 @@ static void __init ek_board_init(void) MACHINE_START(AT91SAM9260EK, "Atmel AT91SAM9260-EK") /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9260_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9261ek.c b/arch/arm/mach-at91/board-sam9261ek.c index 14acc901e24c..f897f84d43dc 100644 --- a/arch/arm/mach-at91/board-sam9261ek.c +++ b/arch/arm/mach-at91/board-sam9261ek.c @@ -48,12 +48,13 @@ #include #include #include +#include #include "sam9_smc.h" #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9261_initialize(18432000); @@ -197,11 +198,6 @@ static struct atmel_nand_data __initdata ek_nand_data = { .rdy_pin = AT91_PIN_PC15, .enable_pin = AT91_PIN_PC14, .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif }; static struct sam9_smc_config __initdata ek_nand_smc_config = { @@ -224,6 +220,7 @@ static struct sam9_smc_config __initdata ek_nand_smc_config = { static void __init ek_add_device_nand(void) { + ek_nand_data.bus_width_16 = !board_have_nand_8bit(); /* setup bus-width (8 or 16) */ if (ek_nand_data.bus_width_16) ek_nand_smc_config.mode |= AT91_SMC_DBW_16; @@ -623,9 +620,9 @@ MACHINE_START(AT91SAM9261EK, "Atmel AT91SAM9261-EK") MACHINE_START(AT91SAM9G10EK, "Atmel AT91SAM9G10-EK") #endif /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9261_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9263ek.c b/arch/arm/mach-at91/board-sam9263ek.c index bfe490df58be..605b26f40a4c 100644 --- a/arch/arm/mach-at91/board-sam9263ek.c +++ b/arch/arm/mach-at91/board-sam9263ek.c @@ -47,12 +47,13 @@ #include #include #include +#include #include "sam9_smc.h" #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 16.367 MHz crystal */ at91sam9263_initialize(16367660); @@ -198,11 +199,6 @@ static struct atmel_nand_data __initdata ek_nand_data = { .rdy_pin = AT91_PIN_PA22, .enable_pin = AT91_PIN_PD15, .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif }; static struct sam9_smc_config __initdata ek_nand_smc_config = { @@ -225,6 +221,7 @@ static struct sam9_smc_config __initdata ek_nand_smc_config = { static void __init ek_add_device_nand(void) { + ek_nand_data.bus_width_16 = !board_have_nand_8bit(); /* setup bus-width (8 or 16) */ if (ek_nand_data.bus_width_16) ek_nand_smc_config.mode |= AT91_SMC_DBW_16; @@ -454,9 +451,9 @@ static void __init ek_board_init(void) MACHINE_START(AT91SAM9263EK, "Atmel AT91SAM9263-EK") /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9263_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9g20ek.c b/arch/arm/mach-at91/board-sam9g20ek.c index ca8198b3c168..7624cf0d006b 100644 --- a/arch/arm/mach-at91/board-sam9g20ek.c +++ b/arch/arm/mach-at91/board-sam9g20ek.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "sam9_smc.h" #include "generic.h" @@ -60,7 +61,7 @@ static int inline ek_have_2mmc(void) } -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -175,11 +176,6 @@ static struct atmel_nand_data __initdata ek_nand_data = { .rdy_pin = AT91_PIN_PC13, .enable_pin = AT91_PIN_PC14, .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif }; static struct sam9_smc_config __initdata ek_nand_smc_config = { @@ -202,6 +198,7 @@ static struct sam9_smc_config __initdata ek_nand_smc_config = { static void __init ek_add_device_nand(void) { + ek_nand_data.bus_width_16 = !board_have_nand_8bit(); /* setup bus-width (8 or 16) */ if (ek_nand_data.bus_width_16) ek_nand_smc_config.mode |= AT91_SMC_DBW_16; @@ -406,18 +403,18 @@ static void __init ek_board_init(void) MACHINE_START(AT91SAM9G20EK, "Atmel AT91SAM9G20-EK") /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9260_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END MACHINE_START(AT91SAM9G20EK_2MMC, "Atmel AT91SAM9G20-EK 2 MMC Slot Mod") /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9260_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9m10g45ek.c b/arch/arm/mach-at91/board-sam9m10g45ek.c index 6c999dbd2bcf..063c95d0e8f0 100644 --- a/arch/arm/mach-at91/board-sam9m10g45ek.c +++ b/arch/arm/mach-at91/board-sam9m10g45ek.c @@ -41,12 +41,13 @@ #include #include #include +#include #include "sam9_smc.h" #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 12.000 MHz crystal */ at91sam9g45_initialize(12000000); @@ -155,11 +156,6 @@ static struct atmel_nand_data __initdata ek_nand_data = { .rdy_pin = AT91_PIN_PC8, .enable_pin = AT91_PIN_PC14, .partition_info = nand_partitions, -#if defined(CONFIG_MTD_NAND_ATMEL_BUSWIDTH_16) - .bus_width_16 = 1, -#else - .bus_width_16 = 0, -#endif }; static struct sam9_smc_config __initdata ek_nand_smc_config = { @@ -182,6 +178,7 @@ static struct sam9_smc_config __initdata ek_nand_smc_config = { static void __init ek_add_device_nand(void) { + ek_nand_data.bus_width_16 = !board_have_nand_8bit(); /* setup bus-width (8 or 16) */ if (ek_nand_data.bus_width_16) ek_nand_smc_config.mode |= AT91_SMC_DBW_16; @@ -424,9 +421,9 @@ static void __init ek_board_init(void) MACHINE_START(AT91SAM9M10G45EK, "Atmel AT91SAM9M10G45-EK") /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9g45_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-sam9rlek.c b/arch/arm/mach-at91/board-sam9rlek.c index 3bf3408e94c1..effb399a80a6 100644 --- a/arch/arm/mach-at91/board-sam9rlek.c +++ b/arch/arm/mach-at91/board-sam9rlek.c @@ -38,7 +38,7 @@ #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 12.000 MHz crystal */ at91sam9rl_initialize(12000000); @@ -329,9 +329,9 @@ static void __init ek_board_init(void) MACHINE_START(AT91SAM9RLEK, "Atmel AT91SAM9RL-EK") /* Maintainer: Atmel */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9rl_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-snapper9260.c b/arch/arm/mach-at91/board-snapper9260.c index 17f7d9b32142..3eb0a1153cc8 100644 --- a/arch/arm/mach-at91/board-snapper9260.c +++ b/arch/arm/mach-at91/board-snapper9260.c @@ -40,7 +40,7 @@ #define SNAPPER9260_IO_EXP_GPIO(x) (NR_BUILTIN_GPIO + (x)) -static void __init snapper9260_map_io(void) +static void __init snapper9260_init_early(void) { at91sam9260_initialize(18432000); @@ -178,9 +178,9 @@ static void __init snapper9260_board_init(void) } MACHINE_START(SNAPPER_9260, "Bluewater Systems Snapper 9260/9G20 module") - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = snapper9260_map_io, + .map_io = at91sam9260_map_io, + .init_early = snapper9260_init_early, .init_irq = snapper9260_init_irq, .init_machine = snapper9260_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-stamp9g20.c b/arch/arm/mach-at91/board-stamp9g20.c index f8902b118960..5e5c85688f5f 100644 --- a/arch/arm/mach-at91/board-stamp9g20.c +++ b/arch/arm/mach-at91/board-stamp9g20.c @@ -32,7 +32,7 @@ #include "generic.h" -void __init stamp9g20_map_io(void) +void __init stamp9g20_init_early(void) { /* Initialize processor: 18.432 MHz crystal */ at91sam9260_initialize(18432000); @@ -44,9 +44,9 @@ void __init stamp9g20_map_io(void) at91_set_serial_console(0); } -static void __init stamp9g20evb_map_io(void) +static void __init stamp9g20evb_init_early(void) { - stamp9g20_map_io(); + stamp9g20_init_early(); /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS @@ -54,9 +54,9 @@ static void __init stamp9g20evb_map_io(void) | ATMEL_UART_DCD | ATMEL_UART_RI); } -static void __init portuxg20_map_io(void) +static void __init portuxg20_init_early(void) { - stamp9g20_map_io(); + stamp9g20_init_early(); /* USART0 on ttyS1. (Rx, Tx, CTS, RTS, DTR, DSR, DCD, RI) */ at91_register_uart(AT91SAM9260_ID_US0, 1, ATMEL_UART_CTS | ATMEL_UART_RTS @@ -298,18 +298,18 @@ static void __init stamp9g20evb_board_init(void) MACHINE_START(PORTUXG20, "taskit PortuxG20") /* Maintainer: taskit GmbH */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = portuxg20_map_io, + .map_io = at91sam9260_map_io, + .init_early = portuxg20_init_early, .init_irq = init_irq, .init_machine = portuxg20_board_init, MACHINE_END MACHINE_START(STAMP9G20, "taskit Stamp9G20") /* Maintainer: taskit GmbH */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = stamp9g20evb_map_io, + .map_io = at91sam9260_map_io, + .init_early = stamp9g20evb_init_early, .init_irq = init_irq, .init_machine = stamp9g20evb_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-usb-a9260.c b/arch/arm/mach-at91/board-usb-a9260.c index 07784baeae84..0e784e6fedec 100644 --- a/arch/arm/mach-at91/board-usb-a9260.c +++ b/arch/arm/mach-at91/board-usb-a9260.c @@ -48,7 +48,7 @@ #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 12.000 MHz crystal */ at91sam9260_initialize(12000000); @@ -228,9 +228,9 @@ static void __init ek_board_init(void) MACHINE_START(USB_A9260, "CALAO USB_A9260") /* Maintainer: calao-systems */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9260_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-usb-a9263.c b/arch/arm/mach-at91/board-usb-a9263.c index b614508931fd..cf626dd14b2c 100644 --- a/arch/arm/mach-at91/board-usb-a9263.c +++ b/arch/arm/mach-at91/board-usb-a9263.c @@ -47,7 +47,7 @@ #include "generic.h" -static void __init ek_map_io(void) +static void __init ek_init_early(void) { /* Initialize processor: 12.00 MHz crystal */ at91sam9263_initialize(12000000); @@ -244,9 +244,9 @@ static void __init ek_board_init(void) MACHINE_START(USB_A9263, "CALAO USB_A9263") /* Maintainer: calao-systems */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91sam926x_timer, - .map_io = ek_map_io, + .map_io = at91sam9263_map_io, + .init_early = ek_init_early, .init_irq = ek_init_irq, .init_machine = ek_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/board-yl-9200.c b/arch/arm/mach-at91/board-yl-9200.c index e0f0080eb639..c208cc334d7d 100644 --- a/arch/arm/mach-at91/board-yl-9200.c +++ b/arch/arm/mach-at91/board-yl-9200.c @@ -45,14 +45,18 @@ #include #include #include +#include #include "generic.h" -static void __init yl9200_map_io(void) +static void __init yl9200_init_early(void) { + /* Set cpu type: PQFP */ + at91rm9200_set_type(ARCH_REVISON_9200_PQFP); + /* Initialize processor: 18.432 MHz crystal */ - at91rm9200_initialize(18432000, AT91RM9200_PQFP); + at91rm9200_initialize(18432000); /* Setup the LEDs D2=PB17 (timer), D3=PB16 (cpu) */ at91_init_leds(AT91_PIN_PB16, AT91_PIN_PB17); @@ -594,9 +598,9 @@ static void __init yl9200_board_init(void) MACHINE_START(YL9200, "uCdragon YL-9200") /* Maintainer: S.Birtles */ - .boot_params = AT91_SDRAM_BASE + 0x100, .timer = &at91rm9200_timer, - .map_io = yl9200_map_io, + .map_io = at91rm9200_map_io, + .init_early = yl9200_init_early, .init_irq = yl9200_init_irq, .init_machine = yl9200_board_init, MACHINE_END diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index 9113da6845f1..61873f3aa92d 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -163,7 +163,7 @@ static struct clk udpck = { .parent = &pllb, .mode = pmc_sys_mode, }; -static struct clk utmi_clk = { +struct clk utmi_clk = { .name = "utmi_clk", .parent = &main_clk, .pmc_mask = AT91_PMC_UPLLEN, /* in CKGR_UCKR */ @@ -182,7 +182,7 @@ static struct clk uhpck = { * memory, interfaces to on-chip peripherals, the AIC, and sometimes more * (e.g baud rate generation). It's sourced from one of the primary clocks. */ -static struct clk mck = { +struct clk mck = { .name = "mck", .pmc_mask = AT91_PMC_MCKRDY, /* in PMC_SR */ }; @@ -215,43 +215,6 @@ static struct clk __init *at91_css_to_clk(unsigned long css) return NULL; } -/* - * Associate a particular clock with a function (eg, "uart") and device. - * The drivers can then request the same 'function' with several different - * devices and not care about which clock name to use. - */ -void __init at91_clock_associate(const char *id, struct device *dev, const char *func) -{ - struct clk *clk = clk_get(NULL, id); - - if (!dev || !clk || !IS_ERR(clk_get(dev, func))) - return; - - clk->function = func; - clk->dev = dev; -} - -/* clocks cannot be de-registered no refcounting necessary */ -struct clk *clk_get(struct device *dev, const char *id) -{ - struct clk *clk; - - list_for_each_entry(clk, &clocks, node) { - if (strcmp(id, clk->name) == 0) - return clk; - if (clk->function && (dev == clk->dev) && strcmp(id, clk->function) == 0) - return clk; - } - - return ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL(clk_get); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL(clk_put); - static void __clk_enable(struct clk *clk) { if (clk->parent) @@ -498,32 +461,38 @@ postcore_initcall(at91_clk_debugfs_init); /*------------------------------------------------------------------------*/ /* Register a new clock */ +static void __init at91_clk_add(struct clk *clk) +{ + list_add_tail(&clk->node, &clocks); + + clk->cl.con_id = clk->name; + clk->cl.clk = clk; + clkdev_add(&clk->cl); +} + int __init clk_register(struct clk *clk) { if (clk_is_peripheral(clk)) { if (!clk->parent) clk->parent = &mck; clk->mode = pmc_periph_mode; - list_add_tail(&clk->node, &clocks); } else if (clk_is_sys(clk)) { clk->parent = &mck; clk->mode = pmc_sys_mode; - - list_add_tail(&clk->node, &clocks); } #ifdef CONFIG_AT91_PROGRAMMABLE_CLOCKS else if (clk_is_programmable(clk)) { clk->mode = pmc_sys_mode; init_programmable_clock(clk); - list_add_tail(&clk->node, &clocks); } #endif + at91_clk_add(clk); + return 0; } - /*------------------------------------------------------------------------*/ static u32 __init at91_pll_rate(struct clk *pll, u32 freq, u32 reg) @@ -630,7 +599,7 @@ static void __init at91_pllb_usbfs_clock_init(unsigned long main_clock) at91_sys_write(AT91_PMC_SCER, AT91RM9200_PMC_MCKUDP); } else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20() || - cpu_is_at91sam9g10() || cpu_is_at572d940hf()) { + cpu_is_at91sam9g10()) { uhpck.pmc_mask = AT91SAM926x_PMC_UHP; udpck.pmc_mask = AT91SAM926x_PMC_UDP; } else if (cpu_is_at91cap9()) { @@ -754,19 +723,19 @@ int __init at91_clock_init(unsigned long main_clock) /* Register the PMC's standard clocks */ for (i = 0; i < ARRAY_SIZE(standard_pmc_clocks); i++) - list_add_tail(&standard_pmc_clocks[i]->node, &clocks); + at91_clk_add(standard_pmc_clocks[i]); if (cpu_has_pllb()) - list_add_tail(&pllb.node, &clocks); + at91_clk_add(&pllb); if (cpu_has_uhp()) - list_add_tail(&uhpck.node, &clocks); + at91_clk_add(&uhpck); if (cpu_has_udpfs()) - list_add_tail(&udpck.node, &clocks); + at91_clk_add(&udpck); if (cpu_has_utmi()) - list_add_tail(&utmi_clk.node, &clocks); + at91_clk_add(&utmi_clk); /* MCK and CPU clock are "always on" */ clk_enable(&mck); diff --git a/arch/arm/mach-at91/clock.h b/arch/arm/mach-at91/clock.h index 6cf4b78e175d..c2e63e47dcbe 100644 --- a/arch/arm/mach-at91/clock.h +++ b/arch/arm/mach-at91/clock.h @@ -6,6 +6,8 @@ * published by the Free Software Foundation. */ +#include + #define CLK_TYPE_PRIMARY 0x1 #define CLK_TYPE_PLL 0x2 #define CLK_TYPE_PROGRAMMABLE 0x4 @@ -16,8 +18,7 @@ struct clk { struct list_head node; const char *name; /* unique clock name */ - const char *function; /* function of the clock */ - struct device *dev; /* device associated with function */ + struct clk_lookup cl; unsigned long rate_hz; struct clk *parent; u32 pmc_mask; @@ -29,3 +30,18 @@ struct clk { extern int __init clk_register(struct clk *clk); +extern struct clk mck; +extern struct clk utmi_clk; + +#define CLKDEV_CON_ID(_id, _clk) \ + { \ + .con_id = _id, \ + .clk = _clk, \ + } + +#define CLKDEV_CON_DEV_ID(_con_id, _dev_id, _clk) \ + { \ + .con_id = _con_id, \ + .dev_id = _dev_id, \ + .clk = _clk, \ + } diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h index 0c66deb2db39..8ff3418f3430 100644 --- a/arch/arm/mach-at91/generic.h +++ b/arch/arm/mach-at91/generic.h @@ -8,8 +8,21 @@ * published by the Free Software Foundation. */ +#include + + /* Map io */ +extern void __init at91rm9200_map_io(void); +extern void __init at91sam9260_map_io(void); +extern void __init at91sam9261_map_io(void); +extern void __init at91sam9263_map_io(void); +extern void __init at91sam9rl_map_io(void); +extern void __init at91sam9g45_map_io(void); +extern void __init at91x40_map_io(void); +extern void __init at91cap9_map_io(void); + /* Processors */ -extern void __init at91rm9200_initialize(unsigned long main_clock, unsigned short banks); +extern void __init at91rm9200_set_type(int type); +extern void __init at91rm9200_initialize(unsigned long main_clock); extern void __init at91sam9260_initialize(unsigned long main_clock); extern void __init at91sam9261_initialize(unsigned long main_clock); extern void __init at91sam9263_initialize(unsigned long main_clock); @@ -17,7 +30,6 @@ extern void __init at91sam9rl_initialize(unsigned long main_clock); extern void __init at91sam9g45_initialize(unsigned long main_clock); extern void __init at91x40_initialize(unsigned long main_clock); extern void __init at91cap9_initialize(unsigned long main_clock); -extern void __init at572d940hf_initialize(unsigned long main_clock); /* Interrupts */ extern void __init at91rm9200_init_interrupts(unsigned int priority[]); @@ -28,7 +40,6 @@ extern void __init at91sam9rl_init_interrupts(unsigned int priority[]); extern void __init at91sam9g45_init_interrupts(unsigned int priority[]); extern void __init at91x40_init_interrupts(unsigned int priority[]); extern void __init at91cap9_init_interrupts(unsigned int priority[]); -extern void __init at572d940hf_init_interrupts(unsigned int priority[]); extern void __init at91_aic_init(unsigned int priority[]); /* Timer */ @@ -39,8 +50,19 @@ extern struct sys_timer at91x40_timer; /* Clocks */ extern int __init at91_clock_init(unsigned long main_clock); +/* + * function to specify the clock of the default console. As we do not + * use the device/driver bus, the dev_name is not intialize. So we need + * to link the clock to a specific con_id only "usart" + */ +extern void __init at91rm9200_set_console_clock(int id); +extern void __init at91sam9260_set_console_clock(int id); +extern void __init at91sam9261_set_console_clock(int id); +extern void __init at91sam9263_set_console_clock(int id); +extern void __init at91sam9rl_set_console_clock(int id); +extern void __init at91sam9g45_set_console_clock(int id); +extern void __init at91cap9_set_console_clock(int id); struct device; -extern void __init at91_clock_associate(const char *id, struct device *dev, const char *func); /* Power Management */ extern void at91_irq_suspend(void); diff --git a/arch/arm/mach-at91/include/mach/at572d940hf.h b/arch/arm/mach-at91/include/mach/at572d940hf.h deleted file mode 100644 index be510cfc56be..000000000000 --- a/arch/arm/mach-at91/include/mach/at572d940hf.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * include/mach/at572d940hf.h - * - * Antonio R. Costa - * Copyright (C) 2008 Atmel - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef AT572D940HF_H -#define AT572D940HF_H - -/* - * Peripheral identifiers/interrupts. - */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Peripherals */ -#define AT572D940HF_ID_PIOA 2 /* Parallel IO Controller A */ -#define AT572D940HF_ID_PIOB 3 /* Parallel IO Controller B */ -#define AT572D940HF_ID_PIOC 4 /* Parallel IO Controller C */ -#define AT572D940HF_ID_EMAC 5 /* MACB ethernet controller */ -#define AT572D940HF_ID_US0 6 /* USART 0 */ -#define AT572D940HF_ID_US1 7 /* USART 1 */ -#define AT572D940HF_ID_US2 8 /* USART 2 */ -#define AT572D940HF_ID_MCI 9 /* Multimedia Card Interface */ -#define AT572D940HF_ID_UDP 10 /* USB Device Port */ -#define AT572D940HF_ID_TWI0 11 /* Two-Wire Interface 0 */ -#define AT572D940HF_ID_SPI0 12 /* Serial Peripheral Interface 0 */ -#define AT572D940HF_ID_SPI1 13 /* Serial Peripheral Interface 1 */ -#define AT572D940HF_ID_SSC0 14 /* Serial Synchronous Controller 0 */ -#define AT572D940HF_ID_SSC1 15 /* Serial Synchronous Controller 1 */ -#define AT572D940HF_ID_SSC2 16 /* Serial Synchronous Controller 2 */ -#define AT572D940HF_ID_TC0 17 /* Timer Counter 0 */ -#define AT572D940HF_ID_TC1 18 /* Timer Counter 1 */ -#define AT572D940HF_ID_TC2 19 /* Timer Counter 2 */ -#define AT572D940HF_ID_UHP 20 /* USB Host port */ -#define AT572D940HF_ID_SSC3 21 /* Serial Synchronous Controller 3 */ -#define AT572D940HF_ID_TWI1 22 /* Two-Wire Interface 1 */ -#define AT572D940HF_ID_CAN0 23 /* CAN Controller 0 */ -#define AT572D940HF_ID_CAN1 24 /* CAN Controller 1 */ -#define AT572D940HF_ID_MHALT 25 /* mAgicV HALT line */ -#define AT572D940HF_ID_MSIRQ0 26 /* mAgicV SIRQ0 line */ -#define AT572D940HF_ID_MEXC 27 /* mAgicV exception line */ -#define AT572D940HF_ID_MEDMA 28 /* mAgicV end of DMA line */ -#define AT572D940HF_ID_IRQ0 29 /* External Interrupt Source (IRQ0) */ -#define AT572D940HF_ID_IRQ1 30 /* External Interrupt Source (IRQ1) */ -#define AT572D940HF_ID_IRQ2 31 /* External Interrupt Source (IRQ2) */ - - -/* - * User Peripheral physical base addresses. - */ -#define AT572D940HF_BASE_TCB 0xfffa0000 -#define AT572D940HF_BASE_TC0 0xfffa0000 -#define AT572D940HF_BASE_TC1 0xfffa0040 -#define AT572D940HF_BASE_TC2 0xfffa0080 -#define AT572D940HF_BASE_UDP 0xfffa4000 -#define AT572D940HF_BASE_MCI 0xfffa8000 -#define AT572D940HF_BASE_TWI0 0xfffac000 -#define AT572D940HF_BASE_US0 0xfffb0000 -#define AT572D940HF_BASE_US1 0xfffb4000 -#define AT572D940HF_BASE_US2 0xfffb8000 -#define AT572D940HF_BASE_SSC0 0xfffbc000 -#define AT572D940HF_BASE_SSC1 0xfffc0000 -#define AT572D940HF_BASE_SSC2 0xfffc4000 -#define AT572D940HF_BASE_SPI0 0xfffc8000 -#define AT572D940HF_BASE_SPI1 0xfffcc000 -#define AT572D940HF_BASE_SSC3 0xfffd0000 -#define AT572D940HF_BASE_TWI1 0xfffd4000 -#define AT572D940HF_BASE_EMAC 0xfffd8000 -#define AT572D940HF_BASE_CAN0 0xfffdc000 -#define AT572D940HF_BASE_CAN1 0xfffe0000 -#define AT91_BASE_SYS 0xffffea00 - - -/* - * System Peripherals (offset from AT91_BASE_SYS) - */ -#define AT91_SDRAMC0 (0xffffea00 - AT91_BASE_SYS) -#define AT91_SMC (0xffffec00 - AT91_BASE_SYS) -#define AT91_MATRIX (0xffffee00 - AT91_BASE_SYS) -#define AT91_AIC (0xfffff000 - AT91_BASE_SYS) -#define AT91_DBGU (0xfffff200 - AT91_BASE_SYS) -#define AT91_PIOA (0xfffff400 - AT91_BASE_SYS) -#define AT91_PIOB (0xfffff600 - AT91_BASE_SYS) -#define AT91_PIOC (0xfffff800 - AT91_BASE_SYS) -#define AT91_PMC (0xfffffc00 - AT91_BASE_SYS) -#define AT91_RSTC (0xfffffd00 - AT91_BASE_SYS) -#define AT91_RTT (0xfffffd20 - AT91_BASE_SYS) -#define AT91_PIT (0xfffffd30 - AT91_BASE_SYS) -#define AT91_WDT (0xfffffd40 - AT91_BASE_SYS) - -#define AT91_USART0 AT572D940HF_ID_US0 -#define AT91_USART1 AT572D940HF_ID_US1 -#define AT91_USART2 AT572D940HF_ID_US2 - - -/* - * Internal Memory. - */ -#define AT572D940HF_SRAM_BASE 0x00300000 /* Internal SRAM base address */ -#define AT572D940HF_SRAM_SIZE (48 * SZ_1K) /* Internal SRAM size (48Kb) */ - -#define AT572D940HF_ROM_BASE 0x00400000 /* Internal ROM base address */ -#define AT572D940HF_ROM_SIZE SZ_32K /* Internal ROM size (32Kb) */ - -#define AT572D940HF_UHP_BASE 0x00500000 /* USB Host controller */ - - -#endif diff --git a/arch/arm/mach-at91/include/mach/at572d940hf_matrix.h b/arch/arm/mach-at91/include/mach/at572d940hf_matrix.h deleted file mode 100644 index b6751df09488..000000000000 --- a/arch/arm/mach-at91/include/mach/at572d940hf_matrix.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * include/mach//at572d940hf_matrix.h - * - * Antonio R. Costa - * Copyright (C) 2008 Atmel - * - * Copyright (C) 2005 SAN People - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef AT572D940HF_MATRIX_H -#define AT572D940HF_MATRIX_H - -#define AT91_MATRIX_MCFG0 (AT91_MATRIX + 0x00) /* Master Configuration Register 0 */ -#define AT91_MATRIX_MCFG1 (AT91_MATRIX + 0x04) /* Master Configuration Register 1 */ -#define AT91_MATRIX_MCFG2 (AT91_MATRIX + 0x08) /* Master Configuration Register 2 */ -#define AT91_MATRIX_MCFG3 (AT91_MATRIX + 0x0C) /* Master Configuration Register 3 */ -#define AT91_MATRIX_MCFG4 (AT91_MATRIX + 0x10) /* Master Configuration Register 4 */ -#define AT91_MATRIX_MCFG5 (AT91_MATRIX + 0x14) /* Master Configuration Register 5 */ - -#define AT91_MATRIX_ULBT (7 << 0) /* Undefined Length Burst Type */ -#define AT91_MATRIX_ULBT_INFINITE (0 << 0) -#define AT91_MATRIX_ULBT_SINGLE (1 << 0) -#define AT91_MATRIX_ULBT_FOUR (2 << 0) -#define AT91_MATRIX_ULBT_EIGHT (3 << 0) -#define AT91_MATRIX_ULBT_SIXTEEN (4 << 0) - -#define AT91_MATRIX_SCFG0 (AT91_MATRIX + 0x40) /* Slave Configuration Register 0 */ -#define AT91_MATRIX_SCFG1 (AT91_MATRIX + 0x44) /* Slave Configuration Register 1 */ -#define AT91_MATRIX_SCFG2 (AT91_MATRIX + 0x48) /* Slave Configuration Register 2 */ -#define AT91_MATRIX_SCFG3 (AT91_MATRIX + 0x4C) /* Slave Configuration Register 3 */ -#define AT91_MATRIX_SCFG4 (AT91_MATRIX + 0x50) /* Slave Configuration Register 4 */ -#define AT91_MATRIX_SLOT_CYCLE (0xff << 0) /* Maximum Number of Allowed Cycles for a Burst */ -#define AT91_MATRIX_DEFMSTR_TYPE (3 << 16) /* Default Master Type */ -#define AT91_MATRIX_DEFMSTR_TYPE_NONE (0 << 16) -#define AT91_MATRIX_DEFMSTR_TYPE_LAST (1 << 16) -#define AT91_MATRIX_DEFMSTR_TYPE_FIXED (2 << 16) -#define AT91_MATRIX_FIXED_DEFMSTR (0x7 << 18) /* Fixed Index of Default Master */ -#define AT91_MATRIX_ARBT (3 << 24) /* Arbitration Type */ -#define AT91_MATRIX_ARBT_ROUND_ROBIN (0 << 24) -#define AT91_MATRIX_ARBT_FIXED_PRIORITY (1 << 24) - -#define AT91_MATRIX_PRAS0 (AT91_MATRIX + 0x80) /* Priority Register A for Slave 0 */ -#define AT91_MATRIX_PRAS1 (AT91_MATRIX + 0x88) /* Priority Register A for Slave 1 */ -#define AT91_MATRIX_PRAS2 (AT91_MATRIX + 0x90) /* Priority Register A for Slave 2 */ -#define AT91_MATRIX_PRAS3 (AT91_MATRIX + 0x98) /* Priority Register A for Slave 3 */ -#define AT91_MATRIX_PRAS4 (AT91_MATRIX + 0xA0) /* Priority Register A for Slave 4 */ - -#define AT91_MATRIX_M0PR (3 << 0) /* Master 0 Priority */ -#define AT91_MATRIX_M1PR (3 << 4) /* Master 1 Priority */ -#define AT91_MATRIX_M2PR (3 << 8) /* Master 2 Priority */ -#define AT91_MATRIX_M3PR (3 << 12) /* Master 3 Priority */ -#define AT91_MATRIX_M4PR (3 << 16) /* Master 4 Priority */ -#define AT91_MATRIX_M5PR (3 << 20) /* Master 5 Priority */ -#define AT91_MATRIX_M6PR (3 << 24) /* Master 6 Priority */ - -#define AT91_MATRIX_MRCR (AT91_MATRIX + 0x100) /* Master Remap Control Register */ -#define AT91_MATRIX_RCB0 (1 << 0) /* Remap Command for AHB Master 0 (ARM926EJ-S Instruction Master) */ -#define AT91_MATRIX_RCB1 (1 << 1) /* Remap Command for AHB Master 1 (ARM926EJ-S Data Master) */ - -#define AT91_MATRIX_SFR0 (AT91_MATRIX + 0x110) /* Special Function Register 0 */ -#define AT91_MATRIX_SFR1 (AT91_MATRIX + 0x114) /* Special Function Register 1 */ -#define AT91_MATRIX_SFR2 (AT91_MATRIX + 0x118) /* Special Function Register 2 */ -#define AT91_MATRIX_SFR3 (AT91_MATRIX + 0x11C) /* Special Function Register 3 */ -#define AT91_MATRIX_SFR4 (AT91_MATRIX + 0x120) /* Special Function Register 4 */ -#define AT91_MATRIX_SFR5 (AT91_MATRIX + 0x124) /* Special Function Register 5 */ -#define AT91_MATRIX_SFR6 (AT91_MATRIX + 0x128) /* Special Function Register 6 */ -#define AT91_MATRIX_SFR7 (AT91_MATRIX + 0x12C) /* Special Function Register 7 */ -#define AT91_MATRIX_SFR8 (AT91_MATRIX + 0x130) /* Special Function Register 8 */ -#define AT91_MATRIX_SFR9 (AT91_MATRIX + 0x134) /* Special Function Register 9 */ -#define AT91_MATRIX_SFR10 (AT91_MATRIX + 0x138) /* Special Function Register 10 */ -#define AT91_MATRIX_SFR11 (AT91_MATRIX + 0x13C) /* Special Function Register 11 */ -#define AT91_MATRIX_SFR12 (AT91_MATRIX + 0x140) /* Special Function Register 12 */ -#define AT91_MATRIX_SFR13 (AT91_MATRIX + 0x144) /* Special Function Register 13 */ -#define AT91_MATRIX_SFR14 (AT91_MATRIX + 0x148) /* Special Function Register 14 */ -#define AT91_MATRIX_SFR15 (AT91_MATRIX + 0x14C) /* Special Function Register 15 */ - - -/* - * The following registers / bits are not defined in the Datasheet (Revision A) - */ - -#define AT91_MATRIX_TCR (AT91_MATRIX + 0x100) /* TCM Configuration Register */ -#define AT91_MATRIX_ITCM_SIZE (0xf << 0) /* Size of ITCM enabled memory block */ -#define AT91_MATRIX_ITCM_0 (0 << 0) -#define AT91_MATRIX_ITCM_16 (5 << 0) -#define AT91_MATRIX_ITCM_32 (6 << 0) -#define AT91_MATRIX_ITCM_64 (7 << 0) -#define AT91_MATRIX_DTCM_SIZE (0xf << 4) /* Size of DTCM enabled memory block */ -#define AT91_MATRIX_DTCM_0 (0 << 4) -#define AT91_MATRIX_DTCM_16 (5 << 4) -#define AT91_MATRIX_DTCM_32 (6 << 4) -#define AT91_MATRIX_DTCM_64 (7 << 4) - -#define AT91_MATRIX_EBICSA (AT91_MATRIX + 0x11C) /* EBI Chip Select Assignment Register */ -#define AT91_MATRIX_CS1A (1 << 1) /* Chip Select 1 Assignment */ -#define AT91_MATRIX_CS1A_SMC (0 << 1) -#define AT91_MATRIX_CS1A_SDRAMC (1 << 1) -#define AT91_MATRIX_CS3A (1 << 3) /* Chip Select 3 Assignment */ -#define AT91_MATRIX_CS3A_SMC (0 << 3) -#define AT91_MATRIX_CS3A_SMC_SMARTMEDIA (1 << 3) -#define AT91_MATRIX_CS4A (1 << 4) /* Chip Select 4 Assignment */ -#define AT91_MATRIX_CS4A_SMC (0 << 4) -#define AT91_MATRIX_CS4A_SMC_CF1 (1 << 4) -#define AT91_MATRIX_CS5A (1 << 5) /* Chip Select 5 Assignment */ -#define AT91_MATRIX_CS5A_SMC (0 << 5) -#define AT91_MATRIX_CS5A_SMC_CF2 (1 << 5) -#define AT91_MATRIX_DBPUC (1 << 8) /* Data Bus Pull-up Configuration */ - -#endif diff --git a/arch/arm/mach-at91/include/mach/at91cap9.h b/arch/arm/mach-at91/include/mach/at91cap9.h index 9c6af9737485..665993849a7b 100644 --- a/arch/arm/mach-at91/include/mach/at91cap9.h +++ b/arch/arm/mach-at91/include/mach/at91cap9.h @@ -20,8 +20,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Peripherals */ #define AT91CAP9_ID_PIOABCD 2 /* Parallel IO Controller A, B, C and D */ #define AT91CAP9_ID_MPB0 3 /* MP Block Peripheral 0 */ #define AT91CAP9_ID_MPB1 4 /* MP Block Peripheral 1 */ @@ -123,6 +121,4 @@ #define AT91CAP9_UDPHS_FIFO 0x00600000 /* USB High Speed Device Port */ #define AT91CAP9_UHP_BASE 0x00700000 /* USB Host controller */ -#define CONFIG_DRAM_BASE AT91_CHIPSELECT_6 - #endif diff --git a/arch/arm/mach-at91/include/mach/at91rm9200.h b/arch/arm/mach-at91/include/mach/at91rm9200.h index 78983155a074..99e0f8d02d7b 100644 --- a/arch/arm/mach-at91/include/mach/at91rm9200.h +++ b/arch/arm/mach-at91/include/mach/at91rm9200.h @@ -19,8 +19,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Peripheral */ #define AT91RM9200_ID_PIOA 2 /* Parallel IO Controller A */ #define AT91RM9200_ID_PIOB 3 /* Parallel IO Controller B */ #define AT91RM9200_ID_PIOC 4 /* Parallel IO Controller C */ diff --git a/arch/arm/mach-at91/include/mach/at91sam9260.h b/arch/arm/mach-at91/include/mach/at91sam9260.h index 4e79036d3b80..8b6bf835cd73 100644 --- a/arch/arm/mach-at91/include/mach/at91sam9260.h +++ b/arch/arm/mach-at91/include/mach/at91sam9260.h @@ -20,8 +20,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Peripherals */ #define AT91SAM9260_ID_PIOA 2 /* Parallel IO Controller A */ #define AT91SAM9260_ID_PIOB 3 /* Parallel IO Controller B */ #define AT91SAM9260_ID_PIOC 4 /* Parallel IO Controller C */ diff --git a/arch/arm/mach-at91/include/mach/at91sam9261.h b/arch/arm/mach-at91/include/mach/at91sam9261.h index 2b5618518129..eafbddaf523c 100644 --- a/arch/arm/mach-at91/include/mach/at91sam9261.h +++ b/arch/arm/mach-at91/include/mach/at91sam9261.h @@ -18,8 +18,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Peripherals */ #define AT91SAM9261_ID_PIOA 2 /* Parallel IO Controller A */ #define AT91SAM9261_ID_PIOB 3 /* Parallel IO Controller B */ #define AT91SAM9261_ID_PIOC 4 /* Parallel IO Controller C */ diff --git a/arch/arm/mach-at91/include/mach/at91sam9263.h b/arch/arm/mach-at91/include/mach/at91sam9263.h index 2091f1e42d43..e2d348213a7b 100644 --- a/arch/arm/mach-at91/include/mach/at91sam9263.h +++ b/arch/arm/mach-at91/include/mach/at91sam9263.h @@ -18,8 +18,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Peripherals */ #define AT91SAM9263_ID_PIOA 2 /* Parallel IO Controller A */ #define AT91SAM9263_ID_PIOB 3 /* Parallel IO Controller B */ #define AT91SAM9263_ID_PIOCDE 4 /* Parallel IO Controller C, D and E */ diff --git a/arch/arm/mach-at91/include/mach/at91sam9g45.h b/arch/arm/mach-at91/include/mach/at91sam9g45.h index a526869aee37..659304aa73d9 100644 --- a/arch/arm/mach-at91/include/mach/at91sam9g45.h +++ b/arch/arm/mach-at91/include/mach/at91sam9g45.h @@ -18,8 +18,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Controller Interrupt */ #define AT91SAM9G45_ID_PIOA 2 /* Parallel I/O Controller A */ #define AT91SAM9G45_ID_PIOB 3 /* Parallel I/O Controller B */ #define AT91SAM9G45_ID_PIOC 4 /* Parallel I/O Controller C */ @@ -131,8 +129,6 @@ #define AT91SAM9G45_EHCI_BASE 0x00800000 /* USB Host controller (EHCI) */ #define AT91SAM9G45_VDEC_BASE 0x00900000 /* Video Decoder Controller */ -#define CONFIG_DRAM_BASE AT91_CHIPSELECT_6 - #define CONSISTENT_DMA_SIZE SZ_4M /* diff --git a/arch/arm/mach-at91/include/mach/at91sam9rl.h b/arch/arm/mach-at91/include/mach/at91sam9rl.h index 87ba8517ad98..41dbbe61055c 100644 --- a/arch/arm/mach-at91/include/mach/at91sam9rl.h +++ b/arch/arm/mach-at91/include/mach/at91sam9rl.h @@ -17,8 +17,6 @@ /* * Peripheral identifiers/interrupts. */ -#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ -#define AT91_ID_SYS 1 /* System Controller */ #define AT91SAM9RL_ID_PIOA 2 /* Parallel IO Controller A */ #define AT91SAM9RL_ID_PIOB 3 /* Parallel IO Controller B */ #define AT91SAM9RL_ID_PIOC 4 /* Parallel IO Controller C */ diff --git a/arch/arm/mach-at91/include/mach/at91x40.h b/arch/arm/mach-at91/include/mach/at91x40.h index 063ac44a0204..a152ff87e688 100644 --- a/arch/arm/mach-at91/include/mach/at91x40.h +++ b/arch/arm/mach-at91/include/mach/at91x40.h @@ -15,8 +15,6 @@ /* * IRQ list. */ -#define AT91_ID_FIQ 0 /* FIQ */ -#define AT91_ID_SYS 1 /* System Peripheral */ #define AT91X40_ID_USART0 2 /* USART port 0 */ #define AT91X40_ID_USART1 3 /* USART port 1 */ #define AT91X40_ID_TC0 4 /* Timer/Counter 0 */ diff --git a/arch/arm/mach-at91/include/mach/board.h b/arch/arm/mach-at91/include/mach/board.h index 2b499eb343a1..ed544a0d5a1d 100644 --- a/arch/arm/mach-at91/include/mach/board.h +++ b/arch/arm/mach-at91/include/mach/board.h @@ -90,7 +90,7 @@ struct at91_eth_data { extern void __init at91_add_device_eth(struct at91_eth_data *data); #if defined(CONFIG_ARCH_AT91SAM9260) || defined(CONFIG_ARCH_AT91SAM9263) || defined(CONFIG_ARCH_AT91SAM9G20) || defined(CONFIG_ARCH_AT91CAP9) \ - || defined(CONFIG_ARCH_AT91SAM9G45) || defined(CONFIG_ARCH_AT572D940HF) + || defined(CONFIG_ARCH_AT91SAM9G45) #define eth_platform_data at91_eth_data #endif @@ -140,6 +140,7 @@ extern void __init at91_set_serial_console(unsigned portnr); extern struct platform_device *atmel_default_console_device; struct atmel_uart_data { + int num; /* port num */ short use_dma_tx; /* use transmit DMA? */ short use_dma_rx; /* use receive DMA? */ void __iomem *regs; /* virt. base address, if any */ @@ -203,9 +204,6 @@ extern void __init at91_init_leds(u8 cpu_led, u8 timer_led); extern void __init at91_gpio_leds(struct gpio_led *leds, int nr); extern void __init at91_pwm_leds(struct gpio_led *leds, int nr); - /* AT572D940HF DSP */ -extern void __init at91_add_device_mAgic(void); - /* FIXME: this needs a better location, but gets stuff building again */ extern int at91_suspend_entering_slow_clock(void); diff --git a/arch/arm/mach-at91/include/mach/clkdev.h b/arch/arm/mach-at91/include/mach/clkdev.h new file mode 100644 index 000000000000..04b37a89801c --- /dev/null +++ b/arch/arm/mach-at91/include/mach/clkdev.h @@ -0,0 +1,7 @@ +#ifndef __ASM_MACH_CLKDEV_H +#define __ASM_MACH_CLKDEV_H + +#define __clk_get(clk) ({ 1; }) +#define __clk_put(clk) do { } while (0) + +#endif diff --git a/arch/arm/mach-at91/include/mach/cpu.h b/arch/arm/mach-at91/include/mach/cpu.h index 3bef931d0b1c..df966c2bc2d4 100644 --- a/arch/arm/mach-at91/include/mach/cpu.h +++ b/arch/arm/mach-at91/include/mach/cpu.h @@ -27,14 +27,13 @@ #define ARCH_ID_AT91SAM9G45 0x819b05a0 #define ARCH_ID_AT91SAM9G45MRL 0x819b05a2 /* aka 9G45-ES2 & non ES lots */ #define ARCH_ID_AT91SAM9G45ES 0x819b05a1 /* 9G45-ES (Engineering Sample) */ +#define ARCH_ID_AT91SAM9X5 0x819a05a0 #define ARCH_ID_AT91CAP9 0x039A03A0 #define ARCH_ID_AT91SAM9XE128 0x329973a0 #define ARCH_ID_AT91SAM9XE256 0x329a93a0 #define ARCH_ID_AT91SAM9XE512 0x329aa3a0 -#define ARCH_ID_AT572D940HF 0x0e0303e0 - #define ARCH_ID_AT91M40800 0x14080044 #define ARCH_ID_AT91R40807 0x44080746 #define ARCH_ID_AT91M40807 0x14080745 @@ -55,6 +54,12 @@ static inline unsigned long at91_cpu_fully_identify(void) #define ARCH_EXID_AT91SAM9G46 0x00000003 #define ARCH_EXID_AT91SAM9G45 0x00000004 +#define ARCH_EXID_AT91SAM9G15 0x00000000 +#define ARCH_EXID_AT91SAM9G35 0x00000001 +#define ARCH_EXID_AT91SAM9X35 0x00000002 +#define ARCH_EXID_AT91SAM9G25 0x00000003 +#define ARCH_EXID_AT91SAM9X25 0x00000004 + static inline unsigned long at91_exid_identify(void) { return at91_sys_read(AT91_DBGU_EXID); @@ -83,9 +88,16 @@ static inline unsigned long at91cap9_rev_identify(void) #endif #ifdef CONFIG_ARCH_AT91RM9200 +extern int rm9200_type; +#define ARCH_REVISON_9200_BGA (0 << 0) +#define ARCH_REVISON_9200_PQFP (1 << 0) #define cpu_is_at91rm9200() (at91_cpu_identify() == ARCH_ID_AT91RM9200) +#define cpu_is_at91rm9200_bga() (!cpu_is_at91rm9200_pqfp()) +#define cpu_is_at91rm9200_pqfp() (cpu_is_at91rm9200() && rm9200_type & ARCH_REVISON_9200_PQFP) #else #define cpu_is_at91rm9200() (0) +#define cpu_is_at91rm9200_bga() (0) +#define cpu_is_at91rm9200_pqfp() (0) #endif #ifdef CONFIG_ARCH_AT91SAM9260 @@ -143,6 +155,27 @@ static inline unsigned long at91cap9_rev_identify(void) #define cpu_is_at91sam9m11() (0) #endif +#ifdef CONFIG_ARCH_AT91SAM9X5 +#define cpu_is_at91sam9x5() (at91_cpu_identify() == ARCH_ID_AT91SAM9X5) +#define cpu_is_at91sam9g15() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9G15)) +#define cpu_is_at91sam9g35() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9G35)) +#define cpu_is_at91sam9x35() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9X35)) +#define cpu_is_at91sam9g25() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9G25)) +#define cpu_is_at91sam9x25() (cpu_is_at91sam9x5() && \ + (at91_exid_identify() == ARCH_EXID_AT91SAM9X25)) +#else +#define cpu_is_at91sam9x5() (0) +#define cpu_is_at91sam9g15() (0) +#define cpu_is_at91sam9g35() (0) +#define cpu_is_at91sam9x35() (0) +#define cpu_is_at91sam9g25() (0) +#define cpu_is_at91sam9x25() (0) +#endif + #ifdef CONFIG_ARCH_AT91CAP9 #define cpu_is_at91cap9() (at91_cpu_identify() == ARCH_ID_AT91CAP9) #define cpu_is_at91cap9_revB() (at91cap9_rev_identify() == ARCH_REVISION_CAP9_B) @@ -153,12 +186,6 @@ static inline unsigned long at91cap9_rev_identify(void) #define cpu_is_at91cap9_revC() (0) #endif -#ifdef CONFIG_ARCH_AT572D940HF -#define cpu_is_at572d940hf() (at91_cpu_identify() == ARCH_ID_AT572D940HF) -#else -#define cpu_is_at572d940hf() (0) -#endif - /* * Since this is ARM, we will never run on any AVR32 CPU. But these * definitions may reduce clutter in common drivers. diff --git a/arch/arm/mach-at91/include/mach/hardware.h b/arch/arm/mach-at91/include/mach/hardware.h index 3d64a75e3ed5..1008b9fb5074 100644 --- a/arch/arm/mach-at91/include/mach/hardware.h +++ b/arch/arm/mach-at91/include/mach/hardware.h @@ -32,13 +32,17 @@ #include #elif defined(CONFIG_ARCH_AT91X40) #include -#elif defined(CONFIG_ARCH_AT572D940HF) -#include #else #error "Unsupported AT91 processor" #endif +/* + * Peripheral identifiers/interrupts. + */ +#define AT91_ID_FIQ 0 /* Advanced Interrupt Controller (FIQ) */ +#define AT91_ID_SYS 1 /* System Peripherals */ + #ifdef CONFIG_MMU /* * Remap the peripherals from address 0xFFF78000 .. 0xFFFFFFFF @@ -82,13 +86,6 @@ #define AT91_CHIPSELECT_6 0x70000000 #define AT91_CHIPSELECT_7 0x80000000 -/* SDRAM */ -#ifdef CONFIG_DRAM_BASE -#define AT91_SDRAM_BASE CONFIG_DRAM_BASE -#else -#define AT91_SDRAM_BASE AT91_CHIPSELECT_1 -#endif - /* Clocks */ #define AT91_SLOW_CLOCK 32768 /* slow clock */ diff --git a/arch/arm/mach-at91/include/mach/memory.h b/arch/arm/mach-at91/include/mach/memory.h index c2cfe5040642..401c207f2f39 100644 --- a/arch/arm/mach-at91/include/mach/memory.h +++ b/arch/arm/mach-at91/include/mach/memory.h @@ -23,6 +23,4 @@ #include -#define PLAT_PHYS_OFFSET (AT91_SDRAM_BASE) - #endif diff --git a/arch/arm/mach-at91/include/mach/stamp9g20.h b/arch/arm/mach-at91/include/mach/stamp9g20.h index 6120f9c46d59..f62c0abca4b4 100644 --- a/arch/arm/mach-at91/include/mach/stamp9g20.h +++ b/arch/arm/mach-at91/include/mach/stamp9g20.h @@ -1,7 +1,7 @@ #ifndef __MACH_STAMP9G20_H #define __MACH_STAMP9G20_H -void stamp9g20_map_io(void); +void stamp9g20_init_early(void); void stamp9g20_board_init(void); #endif diff --git a/arch/arm/mach-at91/include/mach/system_rev.h b/arch/arm/mach-at91/include/mach/system_rev.h new file mode 100644 index 000000000000..b855ee75f72c --- /dev/null +++ b/arch/arm/mach-at91/include/mach/system_rev.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2011 Jean-Christophe PLAGNIOL-VILLARD + * + * Under GPLv2 only + */ + +#ifndef __ARCH_SYSTEM_REV_H__ +#define __ARCH_SYSTEM_REV_H__ + +/* + * board revision encoding + * mach specific + * the 16-31 bit are reserved for at91 generic information + * + * bit 31: + * 0 => nand 16 bit + * 1 => nand 8 bit + */ +#define BOARD_HAVE_NAND_8BIT (1 << 31) +static int inline board_have_nand_8bit(void) +{ + return system_rev & BOARD_HAVE_NAND_8BIT; +} + +#endif /* __ARCH_SYSTEM_REV_H__ */ diff --git a/arch/arm/mach-at91/include/mach/timex.h b/arch/arm/mach-at91/include/mach/timex.h index 05a6e8af80c4..31ac2d97f14c 100644 --- a/arch/arm/mach-at91/include/mach/timex.h +++ b/arch/arm/mach-at91/include/mach/timex.h @@ -82,11 +82,6 @@ #define AT91X40_MASTER_CLOCK 40000000 #define CLOCK_TICK_RATE (AT91X40_MASTER_CLOCK) -#elif defined(CONFIG_ARCH_AT572D940HF) - -#define AT572D940HF_MASTER_CLOCK 80000000 -#define CLOCK_TICK_RATE (AT572D940HF_MASTER_CLOCK/16) - #endif #endif diff --git a/arch/arm/mach-bcmring/arch.c b/arch/arm/mach-bcmring/arch.c index 73eb066d2329..a604b9ebb501 100644 --- a/arch/arm/mach-bcmring/arch.c +++ b/arch/arm/mach-bcmring/arch.c @@ -169,6 +169,7 @@ MACHINE_START(BCMRING, "BCMRING") /* Maintainer: Broadcom Corporation */ .fixup = bcmring_fixup, .map_io = bcmring_map_io, + .init_early = bcmring_init_early, .init_irq = bcmring_init_irq, .timer = &bcmring_timer, .init_machine = bcmring_init_machine diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index 8fc2035759fb..43eadbcc29ed 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -28,8 +28,6 @@ #include #include #include -#include -#include #include #include @@ -37,6 +35,7 @@ #include #include #include +#include #include #include @@ -97,6 +96,35 @@ static struct clk dummy_apb_pclk = { .mode = CLK_MODE_XTAL, }; +/* Timer 0 - 25 MHz, Timer3 at bus clock rate, typically 150-166 MHz */ +#if defined(CONFIG_ARCH_FPGA11107) +/* fpga cpu/bus are currently 30 times slower so scale frequency as well to */ +/* slow down Linux's sense of time */ +#define TIMER0_FREQUENCY_MHZ (tmrHw_LOW_FREQUENCY_MHZ * 30) +#define TIMER1_FREQUENCY_MHZ (tmrHw_LOW_FREQUENCY_MHZ * 30) +#define TIMER3_FREQUENCY_MHZ (tmrHw_HIGH_FREQUENCY_MHZ * 30) +#define TIMER3_FREQUENCY_KHZ (tmrHw_HIGH_FREQUENCY_HZ / 1000 * 30) +#else +#define TIMER0_FREQUENCY_MHZ tmrHw_LOW_FREQUENCY_MHZ +#define TIMER1_FREQUENCY_MHZ tmrHw_LOW_FREQUENCY_MHZ +#define TIMER3_FREQUENCY_MHZ tmrHw_HIGH_FREQUENCY_MHZ +#define TIMER3_FREQUENCY_KHZ (tmrHw_HIGH_FREQUENCY_HZ / 1000) +#endif + +static struct clk sp804_timer012_clk = { + .name = "sp804-timer-0,1,2", + .type = CLK_TYPE_PRIMARY, + .mode = CLK_MODE_XTAL, + .rate_hz = TIMER1_FREQUENCY_MHZ * 1000000, +}; + +static struct clk sp804_timer3_clk = { + .name = "sp804-timer-3", + .type = CLK_TYPE_PRIMARY, + .mode = CLK_MODE_XTAL, + .rate_hz = TIMER3_FREQUENCY_KHZ * 1000, +}; + static struct clk_lookup lookups[] = { { /* Bus clock */ .con_id = "apb_pclk", @@ -107,6 +135,18 @@ static struct clk_lookup lookups[] = { }, { /* UART1 */ .dev_id = "uartb", .clk = &uart_clk, + }, { /* SP804 timer 0 */ + .dev_id = "sp804", + .con_id = "timer0", + .clk = &sp804_timer012_clk, + }, { /* SP804 timer 1 */ + .dev_id = "sp804", + .con_id = "timer1", + .clk = &sp804_timer012_clk, + }, { /* SP804 timer 3 */ + .dev_id = "sp804", + .con_id = "timer3", + .clk = &sp804_timer3_clk, } }; @@ -151,8 +191,6 @@ void __init bcmring_amba_init(void) chipcHw_busInterfaceClockEnable(bus_clock); - clkdev_add_table(lookups, ARRAY_SIZE(lookups)); - for (i = 0; i < ARRAY_SIZE(amba_devs); i++) { struct amba_device *d = amba_devs[i]; amba_device_register(d, &iomem_resource); @@ -162,170 +200,18 @@ void __init bcmring_amba_init(void) /* * Where is the timer (VA)? */ -#define TIMER0_VA_BASE MM_IO_BASE_TMR -#define TIMER1_VA_BASE (MM_IO_BASE_TMR + 0x20) -#define TIMER2_VA_BASE (MM_IO_BASE_TMR + 0x40) -#define TIMER3_VA_BASE (MM_IO_BASE_TMR + 0x60) - -/* Timer 0 - 25 MHz, Timer3 at bus clock rate, typically 150-166 MHz */ -#if defined(CONFIG_ARCH_FPGA11107) -/* fpga cpu/bus are currently 30 times slower so scale frequency as well to */ -/* slow down Linux's sense of time */ -#define TIMER0_FREQUENCY_MHZ (tmrHw_LOW_FREQUENCY_MHZ * 30) -#define TIMER1_FREQUENCY_MHZ (tmrHw_LOW_FREQUENCY_MHZ * 30) -#define TIMER3_FREQUENCY_MHZ (tmrHw_HIGH_FREQUENCY_MHZ * 30) -#define TIMER3_FREQUENCY_KHZ (tmrHw_HIGH_FREQUENCY_HZ / 1000 * 30) -#else -#define TIMER0_FREQUENCY_MHZ tmrHw_LOW_FREQUENCY_MHZ -#define TIMER1_FREQUENCY_MHZ tmrHw_LOW_FREQUENCY_MHZ -#define TIMER3_FREQUENCY_MHZ tmrHw_HIGH_FREQUENCY_MHZ -#define TIMER3_FREQUENCY_KHZ (tmrHw_HIGH_FREQUENCY_HZ / 1000) -#endif - -#define TICKS_PER_uSEC TIMER0_FREQUENCY_MHZ - -/* - * These are useconds NOT ticks. - * - */ -#define mSEC_1 1000 -#define mSEC_5 (mSEC_1 * 5) -#define mSEC_10 (mSEC_1 * 10) -#define mSEC_25 (mSEC_1 * 25) -#define SEC_1 (mSEC_1 * 1000) - -/* - * How long is the timer interval? - */ -#define TIMER_INTERVAL (TICKS_PER_uSEC * mSEC_10) -#if TIMER_INTERVAL >= 0x100000 -#define TIMER_RELOAD (TIMER_INTERVAL >> 8) -#define TIMER_DIVISOR (TIMER_CTRL_DIV256) -#define TICKS2USECS(x) (256 * (x) / TICKS_PER_uSEC) -#elif TIMER_INTERVAL >= 0x10000 -#define TIMER_RELOAD (TIMER_INTERVAL >> 4) /* Divide by 16 */ -#define TIMER_DIVISOR (TIMER_CTRL_DIV16) -#define TICKS2USECS(x) (16 * (x) / TICKS_PER_uSEC) -#else -#define TIMER_RELOAD (TIMER_INTERVAL) -#define TIMER_DIVISOR (TIMER_CTRL_DIV1) -#define TICKS2USECS(x) ((x) / TICKS_PER_uSEC) -#endif - -static void timer_set_mode(enum clock_event_mode mode, - struct clock_event_device *clk) -{ - unsigned long ctrl; - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - writel(TIMER_RELOAD, TIMER0_VA_BASE + TIMER_LOAD); - - ctrl = TIMER_CTRL_PERIODIC; - ctrl |= - TIMER_DIVISOR | TIMER_CTRL_32BIT | TIMER_CTRL_IE | - TIMER_CTRL_ENABLE; - break; - case CLOCK_EVT_MODE_ONESHOT: - /* period set, and timer enabled in 'next_event' hook */ - ctrl = TIMER_CTRL_ONESHOT; - ctrl |= TIMER_DIVISOR | TIMER_CTRL_32BIT | TIMER_CTRL_IE; - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - default: - ctrl = 0; - } - - writel(ctrl, TIMER0_VA_BASE + TIMER_CTRL); -} - -static int timer_set_next_event(unsigned long evt, - struct clock_event_device *unused) -{ - unsigned long ctrl = readl(TIMER0_VA_BASE + TIMER_CTRL); - - writel(evt, TIMER0_VA_BASE + TIMER_LOAD); - writel(ctrl | TIMER_CTRL_ENABLE, TIMER0_VA_BASE + TIMER_CTRL); - - return 0; -} - -static struct clock_event_device timer0_clockevent = { - .name = "timer0", - .shift = 32, - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, - .set_mode = timer_set_mode, - .set_next_event = timer_set_next_event, -}; - -/* - * IRQ handler for the timer - */ -static irqreturn_t bcmring_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = &timer0_clockevent; - - writel(1, TIMER0_VA_BASE + TIMER_INTCLR); - - evt->event_handler(evt); - - return IRQ_HANDLED; -} - -static struct irqaction bcmring_timer_irq = { - .name = "bcmring Timer Tick", - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, - .handler = bcmring_timer_interrupt, -}; - -static cycle_t bcmring_get_cycles_timer1(struct clocksource *cs) -{ - return ~readl(TIMER1_VA_BASE + TIMER_VALUE); -} - -static cycle_t bcmring_get_cycles_timer3(struct clocksource *cs) -{ - return ~readl(TIMER3_VA_BASE + TIMER_VALUE); -} - -static struct clocksource clocksource_bcmring_timer1 = { - .name = "timer1", - .rating = 200, - .read = bcmring_get_cycles_timer1, - .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - -static struct clocksource clocksource_bcmring_timer3 = { - .name = "timer3", - .rating = 100, - .read = bcmring_get_cycles_timer3, - .mask = CLOCKSOURCE_MASK(32), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; +#define TIMER0_VA_BASE ((void __iomem *)MM_IO_BASE_TMR) +#define TIMER1_VA_BASE ((void __iomem *)(MM_IO_BASE_TMR + 0x20)) +#define TIMER2_VA_BASE ((void __iomem *)(MM_IO_BASE_TMR + 0x40)) +#define TIMER3_VA_BASE ((void __iomem *)(MM_IO_BASE_TMR + 0x60)) static int __init bcmring_clocksource_init(void) { /* setup timer1 as free-running clocksource */ - writel(0, TIMER1_VA_BASE + TIMER_CTRL); - writel(0xffffffff, TIMER1_VA_BASE + TIMER_LOAD); - writel(0xffffffff, TIMER1_VA_BASE + TIMER_VALUE); - writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, - TIMER1_VA_BASE + TIMER_CTRL); - - clocksource_register_khz(&clocksource_bcmring_timer1, - TIMER1_FREQUENCY_MHZ * 1000); + sp804_clocksource_init(TIMER1_VA_BASE, "timer1"); /* setup timer3 as free-running clocksource */ - writel(0, TIMER3_VA_BASE + TIMER_CTRL); - writel(0xffffffff, TIMER3_VA_BASE + TIMER_LOAD); - writel(0xffffffff, TIMER3_VA_BASE + TIMER_VALUE); - writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, - TIMER3_VA_BASE + TIMER_CTRL); - - clocksource_register_khz(&clocksource_bcmring_timer3, - TIMER3_FREQUENCY_KHZ); + sp804_clocksource_init(TIMER3_VA_BASE, "timer3"); return 0; } @@ -347,21 +233,16 @@ void __init bcmring_init_timer(void) /* * Make irqs happen for the system timer */ - setup_irq(IRQ_TIMER0, &bcmring_timer_irq); - bcmring_clocksource_init(); - timer0_clockevent.mult = - div_sc(1000000, NSEC_PER_SEC, timer0_clockevent.shift); - timer0_clockevent.max_delta_ns = - clockevent_delta2ns(0xffffffff, &timer0_clockevent); - timer0_clockevent.min_delta_ns = - clockevent_delta2ns(0xf, &timer0_clockevent); - - timer0_clockevent.cpumask = cpumask_of(0); - clockevents_register_device(&timer0_clockevent); + sp804_clockevents_register(TIMER0_VA_BASE, IRQ_TIMER0, "timer0"); } struct sys_timer bcmring_timer = { .init = bcmring_init_timer, }; + +void __init bcmring_init_early(void) +{ + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); +} diff --git a/arch/arm/mach-bcmring/core.h b/arch/arm/mach-bcmring/core.h index b197ba48e36e..e0e02c48f9b1 100644 --- a/arch/arm/mach-bcmring/core.h +++ b/arch/arm/mach-bcmring/core.h @@ -25,6 +25,7 @@ void __init bcmring_amba_init(void); void __init bcmring_map_io(void); void __init bcmring_init_irq(void); +void __init bcmring_init_early(void); extern struct sys_timer bcmring_timer; #endif diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index 32f147998cd9..c0deacae778d 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -63,6 +63,7 @@ config MACH_DAVINCI_EVM depends on ARCH_DAVINCI_DM644x select MISC_DEVICES select EEPROM_AT24 + select I2C help Configure this option to specify the whether the board used for development is a DM644x EVM @@ -72,6 +73,7 @@ config MACH_SFFSDR depends on ARCH_DAVINCI_DM644x select MISC_DEVICES select EEPROM_AT24 + select I2C help Say Y here to select the Lyrtech Small Form Factor Software Defined Radio (SFFSDR) board. @@ -105,6 +107,7 @@ config MACH_DAVINCI_DM6467_EVM select MACH_DAVINCI_DM6467TEVM select MISC_DEVICES select EEPROM_AT24 + select I2C help Configure this option to specify the whether the board used for development is a DM6467 EVM @@ -118,6 +121,7 @@ config MACH_DAVINCI_DM365_EVM depends on ARCH_DAVINCI_DM365 select MISC_DEVICES select EEPROM_AT24 + select I2C help Configure this option to specify whether the board used for development is a DM365 EVM @@ -129,6 +133,7 @@ config MACH_DAVINCI_DA830_EVM select GPIO_PCF857X select MISC_DEVICES select EEPROM_AT24 + select I2C help Say Y here to select the TI DA830/OMAP-L137/AM17x Evaluation Module. @@ -205,6 +210,7 @@ config MACH_MITYOMAPL138 depends on ARCH_DAVINCI_DA850 select MISC_DEVICES select EEPROM_AT24 + select I2C help Say Y here to select the Critical Link MityDSP-L138/MityARM-1808 System on Module. Information on this SoM may be found at diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c index 2aa79c54f98e..606a6f27ed6c 100644 --- a/arch/arm/mach-davinci/board-mityomapl138.c +++ b/arch/arm/mach-davinci/board-mityomapl138.c @@ -29,7 +29,7 @@ #include #include -#define MITYOMAPL138_PHY_ID "0:03" +#define MITYOMAPL138_PHY_ID "" #define FACTORY_CONFIG_MAGIC 0x012C0138 #define FACTORY_CONFIG_VERSION 0x00010001 @@ -414,7 +414,7 @@ static struct resource mityomapl138_nandflash_resource[] = { static struct platform_device mityomapl138_nandflash_device = { .name = "davinci_nand", - .id = 0, + .id = 1, .dev = { .platform_data = &mityomapl138_nandflash_data, }, diff --git a/arch/arm/mach-davinci/cpufreq.c b/arch/arm/mach-davinci/cpufreq.c index 0a95be1512bb..41669ecc1f91 100644 --- a/arch/arm/mach-davinci/cpufreq.c +++ b/arch/arm/mach-davinci/cpufreq.c @@ -94,9 +94,7 @@ static int davinci_target(struct cpufreq_policy *policy, if (freqs.old == freqs.new) return ret; - cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, - dev_driver_string(cpufreq.dev), - "transition: %u --> %u\n", freqs.old, freqs.new); + dev_dbg(&cpufreq.dev, "transition: %u --> %u\n", freqs.old, freqs.new); ret = cpufreq_frequency_table_target(policy, pdata->freq_table, freqs.new, relation, &idx); diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index b95b9196deed..133aac405853 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -1055,7 +1055,7 @@ int da850_register_pm(struct platform_device *pdev) if (!pdata->cpupll_reg_base) return -ENOMEM; - pdata->ddrpll_reg_base = ioremap(DA8XX_PLL1_BASE, SZ_4K); + pdata->ddrpll_reg_base = ioremap(DA850_PLL1_BASE, SZ_4K); if (!pdata->ddrpll_reg_base) { ret = -ENOMEM; goto no_ddrpll_mem; diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 625d4b66718b..fc4e98ea7543 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -24,22 +24,25 @@ #include "clock.h" #define DA8XX_TPCC_BASE 0x01c00000 -#define DA850_MMCSD1_BASE 0x01e1b000 -#define DA850_TPCC1_BASE 0x01e30000 #define DA8XX_TPTC0_BASE 0x01c08000 #define DA8XX_TPTC1_BASE 0x01c08400 -#define DA850_TPTC2_BASE 0x01e38000 #define DA8XX_WDOG_BASE 0x01c21000 /* DA8XX_TIMER64P1_BASE */ #define DA8XX_I2C0_BASE 0x01c22000 -#define DA8XX_RTC_BASE 0x01C23000 +#define DA8XX_RTC_BASE 0x01c23000 +#define DA8XX_MMCSD0_BASE 0x01c40000 +#define DA8XX_SPI0_BASE 0x01c41000 +#define DA830_SPI1_BASE 0x01e12000 +#define DA8XX_LCD_CNTRL_BASE 0x01e13000 +#define DA850_MMCSD1_BASE 0x01e1b000 #define DA8XX_EMAC_CPPI_PORT_BASE 0x01e20000 #define DA8XX_EMAC_CPGMACSS_BASE 0x01e22000 #define DA8XX_EMAC_CPGMAC_BASE 0x01e23000 #define DA8XX_EMAC_MDIO_BASE 0x01e24000 -#define DA8XX_GPIO_BASE 0x01e26000 #define DA8XX_I2C1_BASE 0x01e28000 -#define DA8XX_SPI0_BASE 0x01c41000 -#define DA8XX_SPI1_BASE 0x01f0e000 +#define DA850_TPCC1_BASE 0x01e30000 +#define DA850_TPTC2_BASE 0x01e38000 +#define DA850_SPI1_BASE 0x01f0e000 +#define DA8XX_DDR2_CTL_BASE 0xb0000000 #define DA8XX_EMAC_CTRL_REG_OFFSET 0x3000 #define DA8XX_EMAC_MOD_REG_OFFSET 0x2000 @@ -491,7 +494,7 @@ static struct platform_device da850_mcasp_device = { .resource = da850_mcasp_resources, }; -struct platform_device davinci_pcm_device = { +static struct platform_device davinci_pcm_device = { .name = "davinci-pcm-audio", .id = -1, }; @@ -762,8 +765,8 @@ static struct resource da8xx_spi0_resources[] = { static struct resource da8xx_spi1_resources[] = { [0] = { - .start = DA8XX_SPI1_BASE, - .end = DA8XX_SPI1_BASE + SZ_4K - 1, + .start = DA830_SPI1_BASE, + .end = DA830_SPI1_BASE + SZ_4K - 1, .flags = IORESOURCE_MEM, }, [1] = { @@ -832,5 +835,10 @@ int __init da8xx_register_spi(int instance, struct spi_board_info *info, da8xx_spi_pdata[instance].num_chipselect = len; + if (instance == 1 && cpu_is_davinci_da850()) { + da8xx_spi1_resources[0].start = DA850_SPI1_BASE; + da8xx_spi1_resources[0].end = DA850_SPI1_BASE + SZ_4K - 1; + } + return platform_device_register(&da8xx_spi_device[instance]); } diff --git a/arch/arm/mach-davinci/devices.c b/arch/arm/mach-davinci/devices.c index 22ebc64bc9d9..806a2f02b980 100644 --- a/arch/arm/mach-davinci/devices.c +++ b/arch/arm/mach-davinci/devices.c @@ -33,6 +33,9 @@ #define DM365_MMCSD0_BASE 0x01D11000 #define DM365_MMCSD1_BASE 0x01D00000 +/* System control register offsets */ +#define DM64XX_VDD3P3V_PWDN 0x48 + static struct resource i2c_resources[] = { { .start = DAVINCI_I2C_BASE, @@ -295,7 +298,7 @@ static void davinci_init_wdt(void) /*-------------------------------------------------------------------------*/ -struct platform_device davinci_pcm_device = { +static struct platform_device davinci_pcm_device = { .name = "davinci-pcm-audio", .id = -1, }; diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index f68012239641..a3a94e9c9378 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -314,7 +314,7 @@ static struct clk timer2_clk = { .name = "timer2", .parent = &pll1_aux_clk, .lpsc = DAVINCI_LPSC_TIMER2, - .usecount = 1, /* REVISIT: why can't' this be disabled? */ + .usecount = 1, /* REVISIT: why can't this be disabled? */ }; static struct clk timer3_clk = { diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 5f8a65424184..4c82c2716293 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -274,7 +274,7 @@ static struct clk timer2_clk = { .name = "timer2", .parent = &pll1_aux_clk, .lpsc = DAVINCI_LPSC_TIMER2, - .usecount = 1, /* REVISIT: why can't' this be disabled? */ + .usecount = 1, /* REVISIT: why can't this be disabled? */ }; static struct clk_lookup dm644x_clks[] = { diff --git a/arch/arm/mach-davinci/gpio.c b/arch/arm/mach-davinci/gpio.c index a0b838894ac9..e7221398e5af 100644 --- a/arch/arm/mach-davinci/gpio.c +++ b/arch/arm/mach-davinci/gpio.c @@ -252,9 +252,11 @@ static struct irq_chip gpio_irqchip = { static void gpio_irq_handler(unsigned irq, struct irq_desc *desc) { - struct davinci_gpio_regs __iomem *g = irq2regs(irq); + struct davinci_gpio_regs __iomem *g; u32 mask = 0xffff; + g = (__force struct davinci_gpio_regs __iomem *) irq_desc_get_handler_data(desc); + /* we only care about one bank */ if (irq & 1) mask <<= 16; @@ -422,8 +424,7 @@ static int __init davinci_gpio_irq_setup(void) /* set up all irqs in this bank */ irq_set_chained_handler(bank_irq, gpio_irq_handler); - irq_set_chip_data(bank_irq, (__force void *)g); - irq_set_handler_data(bank_irq, (void *)irq); + irq_set_handler_data(bank_irq, (__force void *)g); for (i = 0; i < 16 && gpio < ngpio; i++, irq++, gpio++) { irq_set_chip(irq, &gpio_irqchip); diff --git a/arch/arm/mach-davinci/include/mach/da8xx.h b/arch/arm/mach-davinci/include/mach/da8xx.h index e4fc1af8500e..ad64da713fc8 100644 --- a/arch/arm/mach-davinci/include/mach/da8xx.h +++ b/arch/arm/mach-davinci/include/mach/da8xx.h @@ -64,13 +64,9 @@ extern unsigned int da850_max_speed; #define DA8XX_TIMER64P1_BASE 0x01c21000 #define DA8XX_GPIO_BASE 0x01e26000 #define DA8XX_PSC1_BASE 0x01e27000 -#define DA8XX_LCD_CNTRL_BASE 0x01e13000 -#define DA8XX_PLL1_BASE 0x01e1a000 -#define DA8XX_MMCSD0_BASE 0x01c40000 #define DA8XX_AEMIF_CS2_BASE 0x60000000 #define DA8XX_AEMIF_CS3_BASE 0x62000000 #define DA8XX_AEMIF_CTL_BASE 0x68000000 -#define DA8XX_DDR2_CTL_BASE 0xb0000000 #define DA8XX_ARM_RAM_BASE 0xffff0000 void __init da830_init(void); diff --git a/arch/arm/mach-davinci/include/mach/debug-macro.S b/arch/arm/mach-davinci/include/mach/debug-macro.S index 9f1befc5ac38..f8b7ea4f6235 100644 --- a/arch/arm/mach-davinci/include/mach/debug-macro.S +++ b/arch/arm/mach-davinci/include/mach/debug-macro.S @@ -24,6 +24,9 @@ #define UART_SHIFT 2 +#define davinci_uart_v2p(x) ((x) - PAGE_OFFSET + PLAT_PHYS_OFFSET) +#define davinci_uart_p2v(x) ((x) - PLAT_PHYS_OFFSET + PAGE_OFFSET) + .pushsection .data davinci_uart_phys: .word 0 davinci_uart_virt: .word 0 @@ -34,7 +37,7 @@ davinci_uart_virt: .word 0 /* Use davinci_uart_phys/virt if already configured */ 10: mrc p15, 0, \rp, c1, c0 tst \rp, #1 @ MMU enabled? - ldreq \rp, =__virt_to_phys(davinci_uart_phys) + ldreq \rp, =davinci_uart_v2p(davinci_uart_phys) ldrne \rp, =davinci_uart_phys add \rv, \rp, #4 @ davinci_uart_virt ldr \rp, [\rp, #0] @@ -48,18 +51,18 @@ davinci_uart_virt: .word 0 tst \rp, #1 @ MMU enabled? /* Copy uart phys address from decompressor uart info */ - ldreq \rv, =__virt_to_phys(davinci_uart_phys) + ldreq \rv, =davinci_uart_v2p(davinci_uart_phys) ldrne \rv, =davinci_uart_phys ldreq \rp, =DAVINCI_UART_INFO - ldrne \rp, =__phys_to_virt(DAVINCI_UART_INFO) + ldrne \rp, =davinci_uart_p2v(DAVINCI_UART_INFO) ldr \rp, [\rp, #0] str \rp, [\rv] /* Copy uart virt address from decompressor uart info */ - ldreq \rv, =__virt_to_phys(davinci_uart_virt) + ldreq \rv, =davinci_uart_v2p(davinci_uart_virt) ldrne \rv, =davinci_uart_virt ldreq \rp, =DAVINCI_UART_INFO - ldrne \rp, =__phys_to_virt(DAVINCI_UART_INFO) + ldrne \rp, =davinci_uart_p2v(DAVINCI_UART_INFO) ldr \rp, [\rp, #4] str \rp, [\rv] diff --git a/arch/arm/mach-davinci/include/mach/hardware.h b/arch/arm/mach-davinci/include/mach/hardware.h index c45ba1f62a11..414e0b93e741 100644 --- a/arch/arm/mach-davinci/include/mach/hardware.h +++ b/arch/arm/mach-davinci/include/mach/hardware.h @@ -21,9 +21,6 @@ */ #define DAVINCI_SYSTEM_MODULE_BASE 0x01C40000 -/* System control register offsets */ -#define DM64XX_VDD3P3V_PWDN 0x48 - /* * I/O mapping */ diff --git a/arch/arm/mach-davinci/include/mach/memory.h b/arch/arm/mach-davinci/include/mach/memory.h index 78822723f382..491249ef209c 100644 --- a/arch/arm/mach-davinci/include/mach/memory.h +++ b/arch/arm/mach-davinci/include/mach/memory.h @@ -41,27 +41,11 @@ */ #define CONSISTENT_DMA_SIZE (14<<20) -#ifndef __ASSEMBLY__ /* * Restrict DMA-able region to workaround silicon bug. The bug * restricts buffers available for DMA to video hardware to be * below 128M */ -static inline void -__arch_adjust_zones(unsigned long *size, unsigned long *holes) -{ - unsigned int sz = (128<<20) >> PAGE_SHIFT; - - size[1] = size[0] - sz; - size[0] = sz; -} - -#define arch_adjust_zones(zone_size, holes) \ - if ((meminfo.bank[0].size >> 20) > 128) __arch_adjust_zones(zone_size, holes) - -#define ISA_DMA_THRESHOLD (PHYS_OFFSET + (128<<20) - 1) -#define MAX_DMA_ADDRESS (PAGE_OFFSET + (128<<20)) - -#endif +#define ARM_DMA_ZONE_SIZE SZ_128M #endif /* __ASM_ARCH_MEMORY_H */ diff --git a/arch/arm/mach-davinci/include/mach/serial.h b/arch/arm/mach-davinci/include/mach/serial.h index 8051110b8ac3..c9e6ce185a66 100644 --- a/arch/arm/mach-davinci/include/mach/serial.h +++ b/arch/arm/mach-davinci/include/mach/serial.h @@ -22,7 +22,7 @@ * * This area sits just below the page tables (see arch/arm/kernel/head.S). */ -#define DAVINCI_UART_INFO (PHYS_OFFSET + 0x3ff8) +#define DAVINCI_UART_INFO (PLAT_PHYS_OFFSET + 0x3ff8) #define DAVINCI_UART0_BASE (IO_PHYS + 0x20000) #define DAVINCI_UART1_BASE (IO_PHYS + 0x20400) diff --git a/arch/arm/mach-davinci/include/mach/uncompress.h b/arch/arm/mach-davinci/include/mach/uncompress.h index 47723e8d75a4..78d80683cdc2 100644 --- a/arch/arm/mach-davinci/include/mach/uncompress.h +++ b/arch/arm/mach-davinci/include/mach/uncompress.h @@ -25,8 +25,7 @@ #include -static u32 *uart; -static u32 *uart_info = (u32 *)(DAVINCI_UART_INFO); +u32 *uart; /* PORT_16C550A, in polled non-fifo mode */ static void putc(char c) @@ -44,6 +43,8 @@ static inline void flush(void) static inline void set_uart_info(u32 phys, void * __iomem virt) { + u32 *uart_info = (u32 *)(DAVINCI_UART_INFO); + uart = (u32 *)phys; uart_info[0] = phys; uart_info[1] = (u32)virt; diff --git a/arch/arm/mach-davinci/irq.c b/arch/arm/mach-davinci/irq.c index e6269a6e0014..bfe68ec4e1a6 100644 --- a/arch/arm/mach-davinci/irq.c +++ b/arch/arm/mach-davinci/irq.c @@ -29,8 +29,6 @@ #include #include -#define IRQ_BIT(irq) ((irq) & 0x1f) - #define FIQ_REG0_OFFSET 0x0000 #define FIQ_REG1_OFFSET 0x0004 #define IRQ_REG0_OFFSET 0x0008 @@ -42,78 +40,33 @@ #define IRQ_INTPRI0_REG_OFFSET 0x0030 #define IRQ_INTPRI7_REG_OFFSET 0x004C -static inline unsigned int davinci_irq_readl(int offset) -{ - return __raw_readl(davinci_intc_base + offset); -} - static inline void davinci_irq_writel(unsigned long value, int offset) { __raw_writel(value, davinci_intc_base + offset); } -/* Disable interrupt */ -static void davinci_mask_irq(struct irq_data *d) +static __init void +davinci_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num) { - unsigned int mask; - u32 l; + struct irq_chip_generic *gc; + struct irq_chip_type *ct; - mask = 1 << IRQ_BIT(d->irq); + gc = irq_alloc_generic_chip("AINTC", 1, irq_start, base, handle_edge_irq); + ct = gc->chip_types; + ct->chip.irq_ack = irq_gc_ack; + ct->chip.irq_mask = irq_gc_mask_clr_bit; + ct->chip.irq_unmask = irq_gc_mask_set_bit; - if (d->irq > 31) { - l = davinci_irq_readl(IRQ_ENT_REG1_OFFSET); - l &= ~mask; - davinci_irq_writel(l, IRQ_ENT_REG1_OFFSET); - } else { - l = davinci_irq_readl(IRQ_ENT_REG0_OFFSET); - l &= ~mask; - davinci_irq_writel(l, IRQ_ENT_REG0_OFFSET); - } + ct->regs.ack = IRQ_REG0_OFFSET; + ct->regs.mask = IRQ_ENT_REG0_OFFSET; + irq_setup_generic_chip(gc, IRQ_MSK(num), IRQ_GC_INIT_MASK_CACHE, + IRQ_NOREQUEST | IRQ_NOPROBE, 0); } -/* Enable interrupt */ -static void davinci_unmask_irq(struct irq_data *d) -{ - unsigned int mask; - u32 l; - - mask = 1 << IRQ_BIT(d->irq); - - if (d->irq > 31) { - l = davinci_irq_readl(IRQ_ENT_REG1_OFFSET); - l |= mask; - davinci_irq_writel(l, IRQ_ENT_REG1_OFFSET); - } else { - l = davinci_irq_readl(IRQ_ENT_REG0_OFFSET); - l |= mask; - davinci_irq_writel(l, IRQ_ENT_REG0_OFFSET); - } -} - -/* EOI interrupt */ -static void davinci_ack_irq(struct irq_data *d) -{ - unsigned int mask; - - mask = 1 << IRQ_BIT(d->irq); - - if (d->irq > 31) - davinci_irq_writel(mask, IRQ_REG1_OFFSET); - else - davinci_irq_writel(mask, IRQ_REG0_OFFSET); -} - -static struct irq_chip davinci_irq_chip_0 = { - .name = "AINTC", - .irq_ack = davinci_ack_irq, - .irq_mask = davinci_mask_irq, - .irq_unmask = davinci_unmask_irq, -}; - /* ARM Interrupt Controller Initialization */ void __init davinci_irq_init(void) { - unsigned i; + unsigned i, j; const u8 *davinci_def_priorities = davinci_soc_info.intc_irq_prios; davinci_intc_type = DAVINCI_INTC_TYPE_AINTC; @@ -144,7 +97,6 @@ void __init davinci_irq_init(void) davinci_irq_writel(~0x0, IRQ_REG1_OFFSET); for (i = IRQ_INTPRI0_REG_OFFSET; i <= IRQ_INTPRI7_REG_OFFSET; i += 4) { - unsigned j; u32 pri; for (j = 0, pri = 0; j < 32; j += 4, davinci_def_priorities++) @@ -152,13 +104,8 @@ void __init davinci_irq_init(void) davinci_irq_writel(pri, i); } - /* set up genirq dispatch for ARM INTC */ - for (i = 0; i < davinci_soc_info.intc_irq_num; i++) { - irq_set_chip(i, &davinci_irq_chip_0); - set_irq_flags(i, IRQF_VALID | IRQF_PROBE); - if (i != IRQ_TINT1_TINT34) - irq_set_handler(i, handle_edge_irq); - else - irq_set_handler(i, handle_level_irq); - } + for (i = 0, j = 0; i < davinci_soc_info.intc_irq_num; i += 32, j += 0x04) + davinci_alloc_gc(davinci_intc_base + j, i, 32); + + irq_set_handler(IRQ_TINT1_TINT34, handle_level_irq); } diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index e06a88f1f81d..5ed51b84c1b2 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -16,10 +16,8 @@ #include #include #include -#include -#include #include -#include +#include #include #include #include @@ -32,11 +30,12 @@ #include #include #include -#include -#include #include +#include #include "common.h" +static int get_tclk(void); + /***************************************************************************** * I/O Address Mapping ****************************************************************************/ @@ -69,464 +68,107 @@ void __init dove_map_io(void) iotable_init(dove_io_desc, ARRAY_SIZE(dove_io_desc)); } -/***************************************************************************** - * EHCI - ****************************************************************************/ -static struct orion_ehci_data dove_ehci_data = { - .dram = &dove_mbus_dram_info, - .phy_version = EHCI_PHY_NA, -}; - -static u64 ehci_dmamask = DMA_BIT_MASK(32); - /***************************************************************************** * EHCI0 ****************************************************************************/ -static struct resource dove_ehci0_resources[] = { - { - .start = DOVE_USB0_PHYS_BASE, - .end = DOVE_USB0_PHYS_BASE + SZ_4K - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_USB0, - .end = IRQ_DOVE_USB0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_ehci0 = { - .name = "orion-ehci", - .id = 0, - .dev = { - .dma_mask = &ehci_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &dove_ehci_data, - }, - .resource = dove_ehci0_resources, - .num_resources = ARRAY_SIZE(dove_ehci0_resources), -}; - void __init dove_ehci0_init(void) { - platform_device_register(&dove_ehci0); + orion_ehci_init(&dove_mbus_dram_info, + DOVE_USB0_PHYS_BASE, IRQ_DOVE_USB0); } /***************************************************************************** * EHCI1 ****************************************************************************/ -static struct resource dove_ehci1_resources[] = { - { - .start = DOVE_USB1_PHYS_BASE, - .end = DOVE_USB1_PHYS_BASE + SZ_4K - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_USB1, - .end = IRQ_DOVE_USB1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_ehci1 = { - .name = "orion-ehci", - .id = 1, - .dev = { - .dma_mask = &ehci_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(32), - .platform_data = &dove_ehci_data, - }, - .resource = dove_ehci1_resources, - .num_resources = ARRAY_SIZE(dove_ehci1_resources), -}; - void __init dove_ehci1_init(void) { - platform_device_register(&dove_ehci1); + orion_ehci_1_init(&dove_mbus_dram_info, + DOVE_USB1_PHYS_BASE, IRQ_DOVE_USB1); } /***************************************************************************** * GE00 ****************************************************************************/ -struct mv643xx_eth_shared_platform_data dove_ge00_shared_data = { - .t_clk = 0, - .dram = &dove_mbus_dram_info, -}; - -static struct resource dove_ge00_shared_resources[] = { - { - .name = "ge00 base", - .start = DOVE_GE00_PHYS_BASE + 0x2000, - .end = DOVE_GE00_PHYS_BASE + SZ_16K - 1, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device dove_ge00_shared = { - .name = MV643XX_ETH_SHARED_NAME, - .id = 0, - .dev = { - .platform_data = &dove_ge00_shared_data, - }, - .num_resources = 1, - .resource = dove_ge00_shared_resources, -}; - -static struct resource dove_ge00_resources[] = { - { - .name = "ge00 irq", - .start = IRQ_DOVE_GE00_SUM, - .end = IRQ_DOVE_GE00_SUM, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_ge00 = { - .name = MV643XX_ETH_NAME, - .id = 0, - .num_resources = 1, - .resource = dove_ge00_resources, - .dev = { - .coherent_dma_mask = 0xffffffff, - }, -}; - void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data) { - eth_data->shared = &dove_ge00_shared; - dove_ge00.dev.platform_data = eth_data; - - platform_device_register(&dove_ge00_shared); - platform_device_register(&dove_ge00); + orion_ge00_init(eth_data, &dove_mbus_dram_info, + DOVE_GE00_PHYS_BASE, IRQ_DOVE_GE00_SUM, + 0, get_tclk()); } /***************************************************************************** * SoC RTC ****************************************************************************/ -static struct resource dove_rtc_resource[] = { - { - .start = DOVE_RTC_PHYS_BASE, - .end = DOVE_RTC_PHYS_BASE + 32 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_RTC, - .flags = IORESOURCE_IRQ, - } -}; - void __init dove_rtc_init(void) { - platform_device_register_simple("rtc-mv", -1, dove_rtc_resource, 2); + orion_rtc_init(DOVE_RTC_PHYS_BASE, IRQ_DOVE_RTC); } /***************************************************************************** * SATA ****************************************************************************/ -static struct resource dove_sata_resources[] = { - { - .name = "sata base", - .start = DOVE_SATA_PHYS_BASE, - .end = DOVE_SATA_PHYS_BASE + 0x5000 - 1, - .flags = IORESOURCE_MEM, - }, { - .name = "sata irq", - .start = IRQ_DOVE_SATA, - .end = IRQ_DOVE_SATA, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_sata = { - .name = "sata_mv", - .id = 0, - .dev = { - .coherent_dma_mask = DMA_BIT_MASK(32), - }, - .num_resources = ARRAY_SIZE(dove_sata_resources), - .resource = dove_sata_resources, -}; - void __init dove_sata_init(struct mv_sata_platform_data *sata_data) { - sata_data->dram = &dove_mbus_dram_info; - dove_sata.dev.platform_data = sata_data; - platform_device_register(&dove_sata); + orion_sata_init(sata_data, &dove_mbus_dram_info, + DOVE_SATA_PHYS_BASE, IRQ_DOVE_SATA); + } /***************************************************************************** * UART0 ****************************************************************************/ -static struct plat_serial8250_port dove_uart0_data[] = { - { - .mapbase = DOVE_UART0_PHYS_BASE, - .membase = (char *)DOVE_UART0_VIRT_BASE, - .irq = IRQ_DOVE_UART_0, - .flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF, - .iotype = UPIO_MEM, - .regshift = 2, - .uartclk = 0, - }, { - }, -}; - -static struct resource dove_uart0_resources[] = { - { - .start = DOVE_UART0_PHYS_BASE, - .end = DOVE_UART0_PHYS_BASE + SZ_256 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_UART_0, - .end = IRQ_DOVE_UART_0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_uart0 = { - .name = "serial8250", - .id = 0, - .dev = { - .platform_data = dove_uart0_data, - }, - .resource = dove_uart0_resources, - .num_resources = ARRAY_SIZE(dove_uart0_resources), -}; - void __init dove_uart0_init(void) { - platform_device_register(&dove_uart0); + orion_uart0_init(DOVE_UART0_VIRT_BASE, DOVE_UART0_PHYS_BASE, + IRQ_DOVE_UART_0, get_tclk()); } /***************************************************************************** * UART1 ****************************************************************************/ -static struct plat_serial8250_port dove_uart1_data[] = { - { - .mapbase = DOVE_UART1_PHYS_BASE, - .membase = (char *)DOVE_UART1_VIRT_BASE, - .irq = IRQ_DOVE_UART_1, - .flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF, - .iotype = UPIO_MEM, - .regshift = 2, - .uartclk = 0, - }, { - }, -}; - -static struct resource dove_uart1_resources[] = { - { - .start = DOVE_UART1_PHYS_BASE, - .end = DOVE_UART1_PHYS_BASE + SZ_256 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_UART_1, - .end = IRQ_DOVE_UART_1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_uart1 = { - .name = "serial8250", - .id = 1, - .dev = { - .platform_data = dove_uart1_data, - }, - .resource = dove_uart1_resources, - .num_resources = ARRAY_SIZE(dove_uart1_resources), -}; - void __init dove_uart1_init(void) { - platform_device_register(&dove_uart1); + orion_uart1_init(DOVE_UART1_VIRT_BASE, DOVE_UART1_PHYS_BASE, + IRQ_DOVE_UART_1, get_tclk()); } /***************************************************************************** * UART2 ****************************************************************************/ -static struct plat_serial8250_port dove_uart2_data[] = { - { - .mapbase = DOVE_UART2_PHYS_BASE, - .membase = (char *)DOVE_UART2_VIRT_BASE, - .irq = IRQ_DOVE_UART_2, - .flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF, - .iotype = UPIO_MEM, - .regshift = 2, - .uartclk = 0, - }, { - }, -}; - -static struct resource dove_uart2_resources[] = { - { - .start = DOVE_UART2_PHYS_BASE, - .end = DOVE_UART2_PHYS_BASE + SZ_256 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_UART_2, - .end = IRQ_DOVE_UART_2, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_uart2 = { - .name = "serial8250", - .id = 2, - .dev = { - .platform_data = dove_uart2_data, - }, - .resource = dove_uart2_resources, - .num_resources = ARRAY_SIZE(dove_uart2_resources), -}; - void __init dove_uart2_init(void) { - platform_device_register(&dove_uart2); + orion_uart2_init(DOVE_UART2_VIRT_BASE, DOVE_UART2_PHYS_BASE, + IRQ_DOVE_UART_2, get_tclk()); } /***************************************************************************** * UART3 ****************************************************************************/ -static struct plat_serial8250_port dove_uart3_data[] = { - { - .mapbase = DOVE_UART3_PHYS_BASE, - .membase = (char *)DOVE_UART3_VIRT_BASE, - .irq = IRQ_DOVE_UART_3, - .flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF, - .iotype = UPIO_MEM, - .regshift = 2, - .uartclk = 0, - }, { - }, -}; - -static struct resource dove_uart3_resources[] = { - { - .start = DOVE_UART3_PHYS_BASE, - .end = DOVE_UART3_PHYS_BASE + SZ_256 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_UART_3, - .end = IRQ_DOVE_UART_3, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_uart3 = { - .name = "serial8250", - .id = 3, - .dev = { - .platform_data = dove_uart3_data, - }, - .resource = dove_uart3_resources, - .num_resources = ARRAY_SIZE(dove_uart3_resources), -}; - void __init dove_uart3_init(void) { - platform_device_register(&dove_uart3); + orion_uart3_init(DOVE_UART3_VIRT_BASE, DOVE_UART3_PHYS_BASE, + IRQ_DOVE_UART_3, get_tclk()); } /***************************************************************************** - * SPI0 + * SPI ****************************************************************************/ -static struct orion_spi_info dove_spi0_data = { - .tclk = 0, -}; - -static struct resource dove_spi0_resources[] = { - { - .start = DOVE_SPI0_PHYS_BASE, - .end = DOVE_SPI0_PHYS_BASE + SZ_512 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_SPI0, - .end = IRQ_DOVE_SPI0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_spi0 = { - .name = "orion_spi", - .id = 0, - .resource = dove_spi0_resources, - .dev = { - .platform_data = &dove_spi0_data, - }, - .num_resources = ARRAY_SIZE(dove_spi0_resources), -}; - void __init dove_spi0_init(void) { - platform_device_register(&dove_spi0); + orion_spi_init(DOVE_SPI0_PHYS_BASE, get_tclk()); } -/***************************************************************************** - * SPI1 - ****************************************************************************/ -static struct orion_spi_info dove_spi1_data = { - .tclk = 0, -}; - -static struct resource dove_spi1_resources[] = { - { - .start = DOVE_SPI1_PHYS_BASE, - .end = DOVE_SPI1_PHYS_BASE + SZ_512 - 1, - .flags = IORESOURCE_MEM, - }, { - .start = IRQ_DOVE_SPI1, - .end = IRQ_DOVE_SPI1, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_spi1 = { - .name = "orion_spi", - .id = 1, - .resource = dove_spi1_resources, - .dev = { - .platform_data = &dove_spi1_data, - }, - .num_resources = ARRAY_SIZE(dove_spi1_resources), -}; - void __init dove_spi1_init(void) { - platform_device_register(&dove_spi1); + orion_spi_init(DOVE_SPI1_PHYS_BASE, get_tclk()); } /***************************************************************************** * I2C ****************************************************************************/ -static struct mv64xxx_i2c_pdata dove_i2c_data = { - .freq_m = 10, /* assumes 166 MHz TCLK gets 94.3kHz */ - .freq_n = 3, - .timeout = 1000, /* Default timeout of 1 second */ -}; - -static struct resource dove_i2c_resources[] = { - { - .name = "i2c base", - .start = DOVE_I2C_PHYS_BASE, - .end = DOVE_I2C_PHYS_BASE + 0x20 - 1, - .flags = IORESOURCE_MEM, - }, { - .name = "i2c irq", - .start = IRQ_DOVE_I2C, - .end = IRQ_DOVE_I2C, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device dove_i2c = { - .name = MV64XXX_I2C_CTLR_NAME, - .id = 0, - .num_resources = ARRAY_SIZE(dove_i2c_resources), - .resource = dove_i2c_resources, - .dev = { - .platform_data = &dove_i2c_data, - }, -}; - void __init dove_i2c_init(void) { - platform_device_register(&dove_i2c); + orion_i2c_init(DOVE_I2C_PHYS_BASE, IRQ_DOVE_I2C, 10); } /***************************************************************************** @@ -553,209 +195,23 @@ struct sys_timer dove_timer = { .init = dove_timer_init, }; -/***************************************************************************** - * XOR - ****************************************************************************/ -static struct mv_xor_platform_shared_data dove_xor_shared_data = { - .dram = &dove_mbus_dram_info, -}; - /***************************************************************************** * XOR 0 ****************************************************************************/ -static u64 dove_xor0_dmamask = DMA_BIT_MASK(32); - -static struct resource dove_xor0_shared_resources[] = { - { - .name = "xor 0 low", - .start = DOVE_XOR0_PHYS_BASE, - .end = DOVE_XOR0_PHYS_BASE + 0xff, - .flags = IORESOURCE_MEM, - }, { - .name = "xor 0 high", - .start = DOVE_XOR0_HIGH_PHYS_BASE, - .end = DOVE_XOR0_HIGH_PHYS_BASE + 0xff, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device dove_xor0_shared = { - .name = MV_XOR_SHARED_NAME, - .id = 0, - .dev = { - .platform_data = &dove_xor_shared_data, - }, - .num_resources = ARRAY_SIZE(dove_xor0_shared_resources), - .resource = dove_xor0_shared_resources, -}; - -static struct resource dove_xor00_resources[] = { - [0] = { - .start = IRQ_DOVE_XOR_00, - .end = IRQ_DOVE_XOR_00, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct mv_xor_platform_data dove_xor00_data = { - .shared = &dove_xor0_shared, - .hw_id = 0, - .pool_size = PAGE_SIZE, -}; - -static struct platform_device dove_xor00_channel = { - .name = MV_XOR_NAME, - .id = 0, - .num_resources = ARRAY_SIZE(dove_xor00_resources), - .resource = dove_xor00_resources, - .dev = { - .dma_mask = &dove_xor0_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), - .platform_data = &dove_xor00_data, - }, -}; - -static struct resource dove_xor01_resources[] = { - [0] = { - .start = IRQ_DOVE_XOR_01, - .end = IRQ_DOVE_XOR_01, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct mv_xor_platform_data dove_xor01_data = { - .shared = &dove_xor0_shared, - .hw_id = 1, - .pool_size = PAGE_SIZE, -}; - -static struct platform_device dove_xor01_channel = { - .name = MV_XOR_NAME, - .id = 1, - .num_resources = ARRAY_SIZE(dove_xor01_resources), - .resource = dove_xor01_resources, - .dev = { - .dma_mask = &dove_xor0_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), - .platform_data = &dove_xor01_data, - }, -}; - void __init dove_xor0_init(void) { - platform_device_register(&dove_xor0_shared); - - /* - * two engines can't do memset simultaneously, this limitation - * satisfied by removing memset support from one of the engines. - */ - dma_cap_set(DMA_MEMCPY, dove_xor00_data.cap_mask); - dma_cap_set(DMA_XOR, dove_xor00_data.cap_mask); - platform_device_register(&dove_xor00_channel); - - dma_cap_set(DMA_MEMCPY, dove_xor01_data.cap_mask); - dma_cap_set(DMA_MEMSET, dove_xor01_data.cap_mask); - dma_cap_set(DMA_XOR, dove_xor01_data.cap_mask); - platform_device_register(&dove_xor01_channel); + orion_xor0_init(&dove_mbus_dram_info, + DOVE_XOR0_PHYS_BASE, DOVE_XOR0_HIGH_PHYS_BASE, + IRQ_DOVE_XOR_00, IRQ_DOVE_XOR_01); } /***************************************************************************** * XOR 1 ****************************************************************************/ -static u64 dove_xor1_dmamask = DMA_BIT_MASK(32); - -static struct resource dove_xor1_shared_resources[] = { - { - .name = "xor 0 low", - .start = DOVE_XOR1_PHYS_BASE, - .end = DOVE_XOR1_PHYS_BASE + 0xff, - .flags = IORESOURCE_MEM, - }, { - .name = "xor 0 high", - .start = DOVE_XOR1_HIGH_PHYS_BASE, - .end = DOVE_XOR1_HIGH_PHYS_BASE + 0xff, - .flags = IORESOURCE_MEM, - }, -}; - -static struct platform_device dove_xor1_shared = { - .name = MV_XOR_SHARED_NAME, - .id = 1, - .dev = { - .platform_data = &dove_xor_shared_data, - }, - .num_resources = ARRAY_SIZE(dove_xor1_shared_resources), - .resource = dove_xor1_shared_resources, -}; - -static struct resource dove_xor10_resources[] = { - [0] = { - .start = IRQ_DOVE_XOR_10, - .end = IRQ_DOVE_XOR_10, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct mv_xor_platform_data dove_xor10_data = { - .shared = &dove_xor1_shared, - .hw_id = 0, - .pool_size = PAGE_SIZE, -}; - -static struct platform_device dove_xor10_channel = { - .name = MV_XOR_NAME, - .id = 2, - .num_resources = ARRAY_SIZE(dove_xor10_resources), - .resource = dove_xor10_resources, - .dev = { - .dma_mask = &dove_xor1_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), - .platform_data = &dove_xor10_data, - }, -}; - -static struct resource dove_xor11_resources[] = { - [0] = { - .start = IRQ_DOVE_XOR_11, - .end = IRQ_DOVE_XOR_11, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct mv_xor_platform_data dove_xor11_data = { - .shared = &dove_xor1_shared, - .hw_id = 1, - .pool_size = PAGE_SIZE, -}; - -static struct platform_device dove_xor11_channel = { - .name = MV_XOR_NAME, - .id = 3, - .num_resources = ARRAY_SIZE(dove_xor11_resources), - .resource = dove_xor11_resources, - .dev = { - .dma_mask = &dove_xor1_dmamask, - .coherent_dma_mask = DMA_BIT_MASK(64), - .platform_data = &dove_xor11_data, - }, -}; - void __init dove_xor1_init(void) { - platform_device_register(&dove_xor1_shared); - - /* - * two engines can't do memset simultaneously, this limitation - * satisfied by removing memset support from one of the engines. - */ - dma_cap_set(DMA_MEMCPY, dove_xor10_data.cap_mask); - dma_cap_set(DMA_XOR, dove_xor10_data.cap_mask); - platform_device_register(&dove_xor10_channel); - - dma_cap_set(DMA_MEMCPY, dove_xor11_data.cap_mask); - dma_cap_set(DMA_MEMSET, dove_xor11_data.cap_mask); - dma_cap_set(DMA_XOR, dove_xor11_data.cap_mask); - platform_device_register(&dove_xor11_channel); + orion_xor1_init(DOVE_XOR1_PHYS_BASE, DOVE_XOR1_HIGH_PHYS_BASE, + IRQ_DOVE_XOR_10, IRQ_DOVE_XOR_11); } /***************************************************************************** @@ -833,14 +289,6 @@ void __init dove_init(void) #endif dove_setup_cpu_mbus(); - dove_ge00_shared_data.t_clk = tclk; - dove_uart0_data[0].uartclk = tclk; - dove_uart1_data[0].uartclk = tclk; - dove_uart2_data[0].uartclk = tclk; - dove_uart3_data[0].uartclk = tclk; - dove_spi0_data.tclk = tclk; - dove_spi1_data.tclk = tclk; - /* internal devices that every board has */ dove_rtc_init(); dove_xor0_init(); diff --git a/arch/arm/mach-dove/mpp.c b/arch/arm/mach-dove/mpp.c index c66c76346904..51e0e411c9cb 100644 --- a/arch/arm/mach-dove/mpp.c +++ b/arch/arm/mach-dove/mpp.c @@ -11,24 +11,17 @@ #include #include #include - +#include #include - #include "mpp.h" -#define MPP_NR_REGS 4 -#define MPP_CTRL(i) ((i) == 3 ? \ - DOVE_MPP_CTRL4_VIRT_BASE : \ - DOVE_MPP_VIRT_BASE + (i) * 4) -#define PMU_SIG_REGS 2 -#define PMU_SIG_CTRL(i) (DOVE_PMU_SIG_CTRL + (i) * 4) - struct dove_mpp_grp { int start; int end; }; -static struct dove_mpp_grp dove_mpp_grp[] = { +/* Map a group to a range of GPIO pins in that group */ +static const struct dove_mpp_grp dove_mpp_grp[] = { [MPP_24_39] = { .start = 24, .end = 39, @@ -38,8 +31,8 @@ static struct dove_mpp_grp dove_mpp_grp[] = { .end = 45, }, [MPP_46_51] = { - .start = 40, - .end = 45, + .start = 46, + .end = 51, }, [MPP_58_61] = { .start = 58, @@ -51,6 +44,8 @@ static struct dove_mpp_grp dove_mpp_grp[] = { }, }; +/* Enable gpio for a range of pins. mode should be a combination of + GPIO_OUTPUT_OK | GPIO_INPUT_OK */ static void dove_mpp_gpio_mode(int start, int end, int gpio_mode) { int i; @@ -59,24 +54,17 @@ static void dove_mpp_gpio_mode(int start, int end, int gpio_mode) orion_gpio_set_valid(i, gpio_mode); } +/* Dump all the extra MPP registers. The platform code will dump the + registers for pins 0-23. */ static void dove_mpp_dump_regs(void) { -#ifdef DEBUG - int i; + pr_debug("PMU_CTRL4_CTRL: %08x\n", + readl(DOVE_MPP_CTRL4_VIRT_BASE)); - pr_debug("MPP_CTRL regs:"); - for (i = 0; i < MPP_NR_REGS; i++) - printk(" %08x", readl(MPP_CTRL(i))); - printk("\n"); + pr_debug("PMU_MPP_GENERAL_CTRL: %08x\n", + readl(DOVE_PMU_MPP_GENERAL_CTRL)); - pr_debug("PMU_SIG_CTRL regs:"); - for (i = 0; i < PMU_SIG_REGS; i++) - printk(" %08x", readl(PMU_SIG_CTRL(i))); - printk("\n"); - - pr_debug("PMU_MPP_GENERAL_CTRL: %08x\n", readl(DOVE_PMU_MPP_GENERAL_CTRL)); pr_debug("MPP_GENERAL: %08x\n", readl(DOVE_MPP_GENERAL_VIRT_BASE)); -#endif } static void dove_mpp_cfg_nfc(int sel) @@ -92,7 +80,7 @@ static void dove_mpp_cfg_nfc(int sel) static void dove_mpp_cfg_au1(int sel) { - u32 mpp_ctrl4 = readl(DOVE_MPP_CTRL4_VIRT_BASE); + u32 mpp_ctrl4 = readl(DOVE_MPP_CTRL4_VIRT_BASE); u32 ssp_ctrl1 = readl(DOVE_SSP_CTRL_STATUS_1); u32 mpp_gen_ctrl = readl(DOVE_MPP_GENERAL_VIRT_BASE); u32 global_cfg_2 = readl(DOVE_GLOBAL_CONFIG_2); @@ -128,82 +116,46 @@ static void dove_mpp_cfg_au1(int sel) writel(global_cfg_2, DOVE_GLOBAL_CONFIG_2); } -static void dove_mpp_conf_grp(int num, int sel, u32 *mpp_ctrl) +/* Configure the group registers, enabling GPIO if sel indicates the + pin is to be used for GPIO */ +static void dove_mpp_conf_grp(unsigned int *mpp_grp_list) { - int start = dove_mpp_grp[num].start; - int end = dove_mpp_grp[num].end; - int gpio_mode = sel ? GPIO_OUTPUT_OK | GPIO_INPUT_OK : 0; + u32 mpp_ctrl4 = readl(DOVE_MPP_CTRL4_VIRT_BASE); + int gpio_mode; - *mpp_ctrl &= ~(0x1 << num); - *mpp_ctrl |= sel << num; + for ( ; *mpp_grp_list; mpp_grp_list++) { + unsigned int num = MPP_NUM(*mpp_grp_list); + unsigned int sel = MPP_SEL(*mpp_grp_list); - dove_mpp_gpio_mode(start, end, gpio_mode); -} - -void __init dove_mpp_conf(unsigned int *mpp_list) -{ - u32 mpp_ctrl[MPP_NR_REGS]; - u32 pmu_mpp_ctrl = 0; - u32 pmu_sig_ctrl[PMU_SIG_REGS]; - int i; - - for (i = 0; i < MPP_NR_REGS; i++) - mpp_ctrl[i] = readl(MPP_CTRL(i)); - - for (i = 0; i < PMU_SIG_REGS; i++) - pmu_sig_ctrl[i] = readl(PMU_SIG_CTRL(i)); - - pmu_mpp_ctrl = readl(DOVE_PMU_MPP_GENERAL_CTRL); - - dove_mpp_dump_regs(); - - for ( ; *mpp_list != MPP_END; mpp_list++) { - unsigned int num = MPP_NUM(*mpp_list); - unsigned int sel = MPP_SEL(*mpp_list); - int shift, gpio_mode; - - if (num > MPP_MAX) { - pr_err("dove: invalid MPP number (%u)\n", num); + if (num > MPP_GRP_MAX) { + pr_err("dove: invalid MPP GRP number (%u)\n", num); continue; } - if (*mpp_list & MPP_NFC_MASK) { - dove_mpp_cfg_nfc(sel); - continue; - } + mpp_ctrl4 &= ~(0x1 << num); + mpp_ctrl4 |= sel << num; - if (*mpp_list & MPP_AU1_MASK) { - dove_mpp_cfg_au1(sel); - continue; - } - - if (*mpp_list & MPP_GRP_MASK) { - dove_mpp_conf_grp(num, sel, &mpp_ctrl[3]); - continue; - } - - shift = (num & 7) << 2; - if (*mpp_list & MPP_PMU_MASK) { - pmu_mpp_ctrl |= (0x1 << num); - pmu_sig_ctrl[num / 8] &= ~(0xf << shift); - pmu_sig_ctrl[num / 8] |= 0xf << shift; - gpio_mode = 0; - } else { - mpp_ctrl[num / 8] &= ~(0xf << shift); - mpp_ctrl[num / 8] |= sel << shift; - gpio_mode = GPIO_OUTPUT_OK | GPIO_INPUT_OK; - } - - orion_gpio_set_valid(num, gpio_mode); + gpio_mode = sel ? GPIO_OUTPUT_OK | GPIO_INPUT_OK : 0; + dove_mpp_gpio_mode(dove_mpp_grp[num].start, + dove_mpp_grp[num].end, gpio_mode); } + writel(mpp_ctrl4, DOVE_MPP_CTRL4_VIRT_BASE); +} - for (i = 0; i < MPP_NR_REGS; i++) - writel(mpp_ctrl[i], MPP_CTRL(i)); +/* Configure the various MPP pins on Dove */ +void __init dove_mpp_conf(unsigned int *mpp_list, + unsigned int *mpp_grp_list, + unsigned int grp_au1_52_57, + unsigned int grp_nfc_64_71) +{ + dove_mpp_dump_regs(); - for (i = 0; i < PMU_SIG_REGS; i++) - writel(pmu_sig_ctrl[i], PMU_SIG_CTRL(i)); + /* Use platform code for pins 0-23 */ + orion_mpp_conf(mpp_list, 0, MPP_MAX, DOVE_MPP_VIRT_BASE); - writel(pmu_mpp_ctrl, DOVE_PMU_MPP_GENERAL_CTRL); + dove_mpp_conf_grp(mpp_grp_list); + dove_mpp_cfg_au1(grp_au1_52_57); + dove_mpp_cfg_nfc(grp_nfc_64_71); dove_mpp_dump_regs(); } diff --git a/arch/arm/mach-dove/mpp.h b/arch/arm/mach-dove/mpp.h index 2a43ce413b15..fbec7c52bfac 100644 --- a/arch/arm/mach-dove/mpp.h +++ b/arch/arm/mach-dove/mpp.h @@ -1,178 +1,150 @@ #ifndef __ARCH_DOVE_MPP_CODED_H #define __ARCH_DOVE_MPP_CODED_H -#define MPP(_num, _mode, _pmu, _grp, _au1, _nfc) ( \ -/* MPP/group number */ ((_num) & 0xff) | \ -/* MPP select value */ (((_mode) & 0xf) << 8) | \ -/* MPP PMU */ ((!!(_pmu)) << 12) | \ -/* group flag */ ((!!(_grp)) << 13) | \ -/* AU1 flag */ ((!!(_au1)) << 14) | \ -/* NFCE flag */ ((!!(_nfc)) << 15)) +#define MPP(_num, _sel, _in, _out) ( \ + /* MPP number */ ((_num) & 0xff) | \ + /* MPP select value */ (((_sel) & 0xf) << 8) | \ + /* may be input signal */ ((!!(_in)) << 12) | \ + /* may be output signal */ ((!!(_out)) << 13)) -#define MPP_MAX 71 +#define MPP0_GPIO0 MPP(0, 0x0, 1, 1) +#define MPP0_UA2_RTSn MPP(0, 0x2, 0, 0) +#define MPP0_SDIO0_CD MPP(0, 0x3, 0, 0) +#define MPP0_LCD0_PWM MPP(0, 0xf, 0, 0) -#define MPP_NUM(x) ((x) & 0xff) -#define MPP_SEL(x) (((x) >> 8) & 0xf) +#define MPP1_GPIO1 MPP(1, 0x0, 1, 1) +#define MPP1_UA2_CTSn MPP(1, 0x2, 0, 0) +#define MPP1_SDIO0_WP MPP(1, 0x3, 0, 0) +#define MPP1_LCD1_PWM MPP(1, 0xf, 0, 0) -#define MPP_PMU_MASK MPP(0, 0x0, 1, 0, 0, 0) -#define MPP_GRP_MASK MPP(0, 0x0, 0, 1, 0, 0) -#define MPP_AU1_MASK MPP(0, 0x0, 0, 0, 1, 0) -#define MPP_NFC_MASK MPP(0, 0x0, 0, 0, 0, 1) +#define MPP2_GPIO2 MPP(2, 0x0, 1, 1) +#define MPP2_SATA_PRESENT MPP(2, 0x1, 0, 0) +#define MPP2_UA2_TXD MPP(2, 0x2, 0, 0) +#define MPP2_SDIO0_BUS_POWER MPP(2, 0x3, 0, 0) +#define MPP2_UA_RTSn1 MPP(2, 0x4, 0, 0) -#define MPP_END MPP(0xff, 0xf, 1, 1, 1, 1) +#define MPP3_GPIO3 MPP(3, 0x0, 1, 1) +#define MPP3_SATA_ACT MPP(3, 0x1, 0, 0) +#define MPP3_UA2_RXD MPP(3, 0x2, 0, 0) +#define MPP3_SDIO0_LED_CTRL MPP(3, 0x3, 0, 0) +#define MPP3_UA_CTSn1 MPP(3, 0x4, 0, 0) +#define MPP3_SPI_LCD_CS1 MPP(3, 0xf, 0, 0) -#define MPP_PMU_DRIVE_0 0x1 -#define MPP_PMU_DRIVE_1 0x2 -#define MPP_PMU_SDI 0x3 -#define MPP_PMU_CPU_PWRDWN 0x4 -#define MPP_PMU_STBY_PWRDWN 0x5 -#define MPP_PMU_CORE_PWR_GOOD 0x8 -#define MPP_PMU_BAT_FAULT 0xa -#define MPP_PMU_EXT0_WU 0xb -#define MPP_PMU_EXT1_WU 0xc -#define MPP_PMU_EXT2_WU 0xd -#define MPP_PMU_BLINK 0xe -#define MPP_PMU(_num, _mode) MPP((_num), MPP_PMU_##_mode, 1, 0, 0, 0) +#define MPP4_GPIO4 MPP(4, 0x0, 1, 1) +#define MPP4_UA3_RTSn MPP(4, 0x2, 0, 0) +#define MPP4_SDIO1_CD MPP(4, 0x3, 0, 0) +#define MPP4_SPI_1_MISO MPP(4, 0x4, 0, 0) -#define MPP_PIN(_num, _mode) MPP((_num), (_mode), 0, 0, 0, 0) -#define MPP_GRP(_grp, _mode) MPP((_grp), (_mode), 0, 1, 0, 0) -#define MPP_GRP_AU1(_mode) MPP(0, (_mode), 0, 0, 1, 0) -#define MPP_GRP_NFC(_mode) MPP(0, (_mode), 0, 0, 0, 1) +#define MPP5_GPIO5 MPP(5, 0x0, 1, 1) +#define MPP5_UA3_CTSn MPP(5, 0x2, 0, 0) +#define MPP5_SDIO1_WP MPP(5, 0x3, 0, 0) +#define MPP5_SPI_1_CS MPP(5, 0x4, 0, 0) -#define MPP0_GPIO0 MPP_PIN(0, 0x0) -#define MPP0_UA2_RTSn MPP_PIN(0, 0x2) -#define MPP0_SDIO0_CD MPP_PIN(0, 0x3) -#define MPP0_LCD0_PWM MPP_PIN(0, 0xf) +#define MPP6_GPIO6 MPP(6, 0x0, 1, 1) +#define MPP6_UA3_TXD MPP(6, 0x2, 0, 0) +#define MPP6_SDIO1_BUS_POWER MPP(6, 0x3, 0, 0) +#define MPP6_SPI_1_MOSI MPP(6, 0x4, 0, 0) -#define MPP1_GPIO1 MPP_PIN(1, 0x0) -#define MPP1_UA2_CTSn MPP_PIN(1, 0x2) -#define MPP1_SDIO0_WP MPP_PIN(1, 0x3) -#define MPP1_LCD1_PWM MPP_PIN(1, 0xf) +#define MPP7_GPIO7 MPP(7, 0x0, 1, 1) +#define MPP7_UA3_RXD MPP(7, 0x2, 0, 0) +#define MPP7_SDIO1_LED_CTRL MPP(7, 0x3, 0, 0) +#define MPP7_SPI_1_SCK MPP(7, 0x4, 0, 0) -#define MPP2_GPIO2 MPP_PIN(2, 0x0) -#define MPP2_SATA_PRESENT MPP_PIN(2, 0x1) -#define MPP2_UA2_TXD MPP_PIN(2, 0x2) -#define MPP2_SDIO0_BUS_POWER MPP_PIN(2, 0x3) -#define MPP2_UA_RTSn1 MPP_PIN(2, 0x4) +#define MPP8_GPIO8 MPP(8, 0x0, 1, 1) +#define MPP8_WD_RST_OUT MPP(8, 0x1, 0, 0) -#define MPP3_GPIO3 MPP_PIN(3, 0x0) -#define MPP3_SATA_ACT MPP_PIN(3, 0x1) -#define MPP3_UA2_RXD MPP_PIN(3, 0x2) -#define MPP3_SDIO0_LED_CTRL MPP_PIN(3, 0x3) -#define MPP3_UA_CTSn1 MPP_PIN(3, 0x4) -#define MPP3_SPI_LCD_CS1 MPP_PIN(3, 0xf) +#define MPP9_GPIO9 MPP(9, 0x0, 1, 1) +#define MPP9_PEX1_CLKREQn MPP(9, 0x5, 0, 0) -#define MPP4_GPIO4 MPP_PIN(4, 0x0) -#define MPP4_UA3_RTSn MPP_PIN(4, 0x2) -#define MPP4_SDIO1_CD MPP_PIN(4, 0x3) -#define MPP4_SPI_1_MISO MPP_PIN(4, 0x4) +#define MPP10_GPIO10 MPP(10, 0x0, 1, 1) +#define MPP10_SSP_SCLK MPP(10, 0x5, 0, 0) -#define MPP5_GPIO5 MPP_PIN(5, 0x0) -#define MPP5_UA3_CTSn MPP_PIN(5, 0x2) -#define MPP5_SDIO1_WP MPP_PIN(5, 0x3) -#define MPP5_SPI_1_CS MPP_PIN(5, 0x4) +#define MPP11_GPIO11 MPP(11, 0x0, 1, 1) +#define MPP11_SATA_PRESENT MPP(11, 0x1, 0, 0) +#define MPP11_SATA_ACT MPP(11, 0x2, 0, 0) +#define MPP11_SDIO0_LED_CTRL MPP(11, 0x3, 0, 0) +#define MPP11_SDIO1_LED_CTRL MPP(11, 0x4, 0, 0) +#define MPP11_PEX0_CLKREQn MPP(11, 0x5, 0, 0) -#define MPP6_GPIO6 MPP_PIN(6, 0x0) -#define MPP6_UA3_TXD MPP_PIN(6, 0x2) -#define MPP6_SDIO1_BUS_POWER MPP_PIN(6, 0x3) -#define MPP6_SPI_1_MOSI MPP_PIN(6, 0x4) +#define MPP12_GPIO12 MPP(12, 0x0, 1, 1) +#define MPP12_SATA_ACT MPP(12, 0x1, 0, 0) +#define MPP12_UA2_RTSn MPP(12, 0x2, 0, 0) +#define MPP12_AD0_I2S_EXT_MCLK MPP(12, 0x3, 0, 0) +#define MPP12_SDIO1_CD MPP(12, 0x4, 0, 0) -#define MPP7_GPIO7 MPP_PIN(7, 0x0) -#define MPP7_UA3_RXD MPP_PIN(7, 0x2) -#define MPP7_SDIO1_LED_CTRL MPP_PIN(7, 0x3) -#define MPP7_SPI_1_SCK MPP_PIN(7, 0x4) +#define MPP13_GPIO13 MPP(13, 0x0, 1, 1) +#define MPP13_UA2_CTSn MPP(13, 0x2, 0, 0) +#define MPP13_AD1_I2S_EXT_MCLK MPP(13, 0x3, 0, 0) +#define MPP13_SDIO1WP MPP(13, 0x4, 0, 0) +#define MPP13_SSP_EXTCLK MPP(13, 0x5, 0, 0) -#define MPP8_GPIO8 MPP_PIN(8, 0x0) -#define MPP8_WD_RST_OUT MPP_PIN(8, 0x1) +#define MPP14_GPIO14 MPP(14, 0x0, 1, 1) +#define MPP14_UA2_TXD MPP(14, 0x2, 0, 0) +#define MPP14_SDIO1_BUS_POWER MPP(14, 0x4, 0, 0) +#define MPP14_SSP_RXD MPP(14, 0x5, 0, 0) -#define MPP9_GPIO9 MPP_PIN(9, 0x0) -#define MPP9_PEX1_CLKREQn MPP_PIN(9, 0x5) +#define MPP15_GPIO15 MPP(15, 0x0, 1, 1) +#define MPP15_UA2_RXD MPP(15, 0x2, 0, 0) +#define MPP15_SDIO1_LED_CTRL MPP(15, 0x4, 0, 0) +#define MPP15_SSP_SFRM MPP(15, 0x5, 0, 0) -#define MPP10_GPIO10 MPP_PIN(10, 0x0) -#define MPP10_SSP_SCLK MPP_PIN(10, 0x5) +#define MPP16_GPIO16 MPP(16, 0x0, 1, 1) +#define MPP16_UA3_RTSn MPP(16, 0x2, 0, 0) +#define MPP16_SDIO0_CD MPP(16, 0x3, 0, 0) +#define MPP16_SPI_LCD_CS1 MPP(16, 0x4, 0, 0) +#define MPP16_AC97_SDATA_IN1 MPP(16, 0x5, 0, 0) -#define MPP11_GPIO11 MPP_PIN(11, 0x0) -#define MPP11_SATA_PRESENT MPP_PIN(11, 0x1) -#define MPP11_SATA_ACT MPP_PIN(11, 0x2) -#define MPP11_SDIO0_LED_CTRL MPP_PIN(11, 0x3) -#define MPP11_SDIO1_LED_CTRL MPP_PIN(11, 0x4) -#define MPP11_PEX0_CLKREQn MPP_PIN(11, 0x5) +#define MPP17_GPIO17 MPP(17, 0x0, 1, 1) +#define MPP17_AC97_SYSCLK_OUT MPP(17, 0x1, 0, 0) +#define MPP17_UA3_CTSn MPP(17, 0x2, 0, 0) +#define MPP17_SDIO0_WP MPP(17, 0x3, 0, 0) +#define MPP17_TW_SDA2 MPP(17, 0x4, 0, 0) +#define MPP17_AC97_SDATA_IN2 MPP(17, 0x5, 0, 0) -#define MPP12_GPIO12 MPP_PIN(12, 0x0) -#define MPP12_SATA_ACT MPP_PIN(12, 0x1) -#define MPP12_UA2_RTSn MPP_PIN(12, 0x2) -#define MPP12_AD0_I2S_EXT_MCLK MPP_PIN(12, 0x3) -#define MPP12_SDIO1_CD MPP_PIN(12, 0x4) +#define MPP18_GPIO18 MPP(18, 0x0, 1, 1) +#define MPP18_UA3_TXD MPP(18, 0x2, 0, 0) +#define MPP18_SDIO0_BUS_POWER MPP(18, 0x3, 0, 0) +#define MPP18_LCD0_PWM MPP(18, 0x4, 0, 0) +#define MPP18_AC_SDATA_IN3 MPP(18, 0x5, 0, 0) -#define MPP13_GPIO13 MPP_PIN(13, 0x0) -#define MPP13_UA2_CTSn MPP_PIN(13, 0x2) -#define MPP13_AD1_I2S_EXT_MCLK MPP_PIN(13, 0x3) -#define MPP13_SDIO1WP MPP_PIN(13, 0x4) -#define MPP13_SSP_EXTCLK MPP_PIN(13, 0x5) +#define MPP19_GPIO19 MPP(19, 0x0, 1, 1) +#define MPP19_UA3_RXD MPP(19, 0x2, 0, 0) +#define MPP19_SDIO0_LED_CTRL MPP(19, 0x3, 0, 0) +#define MPP19_TW_SCK2 MPP(19, 0x4, 0, 0) -#define MPP14_GPIO14 MPP_PIN(14, 0x0) -#define MPP14_UA2_TXD MPP_PIN(14, 0x2) -#define MPP14_SDIO1_BUS_POWER MPP_PIN(14, 0x4) -#define MPP14_SSP_RXD MPP_PIN(14, 0x5) +#define MPP20_GPIO20 MPP(20, 0x0, 1, 1) +#define MPP20_AC97_SYSCLK_OUT MPP(20, 0x1, 0, 0) +#define MPP20_SPI_LCD_MISO MPP(20, 0x2, 0, 0) +#define MPP20_SDIO1_CD MPP(20, 0x3, 0, 0) +#define MPP20_SDIO0_CD MPP(20, 0x5, 0, 0) +#define MPP20_SPI_1_MISO MPP(20, 0x6, 0, 0) -#define MPP15_GPIO15 MPP_PIN(15, 0x0) -#define MPP15_UA2_RXD MPP_PIN(15, 0x2) -#define MPP15_SDIO1_LED_CTRL MPP_PIN(15, 0x4) -#define MPP15_SSP_SFRM MPP_PIN(15, 0x5) +#define MPP21_GPIO21 MPP(21, 0x0, 1, 1) +#define MPP21_UA1_RTSn MPP(21, 0x1, 0, 0) +#define MPP21_SPI_LCD_CS0 MPP(21, 0x2, 0, 0) +#define MPP21_SDIO1_WP MPP(21, 0x3, 0, 0) +#define MPP21_SSP_SFRM MPP(21, 0x4, 0, 0) +#define MPP21_SDIO0_WP MPP(21, 0x5, 0, 0) +#define MPP21_SPI_1_CS MPP(21, 0x6, 0, 0) -#define MPP16_GPIO16 MPP_PIN(16, 0x0) -#define MPP16_UA3_RTSn MPP_PIN(16, 0x2) -#define MPP16_SDIO0_CD MPP_PIN(16, 0x3) -#define MPP16_SPI_LCD_CS1 MPP_PIN(16, 0x4) -#define MPP16_AC97_SDATA_IN1 MPP_PIN(16, 0x5) +#define MPP22_GPIO22 MPP(22, 0x0, 1, 1) +#define MPP22_UA1_CTSn MPP(22, 0x1, 0, 0) +#define MPP22_SPI_LCD_MOSI MPP(22, 0x2, 0, 0) +#define MPP22_SDIO1_BUS_POWER MPP(22, 0x3, 0, 0) +#define MPP22_SSP_TXD MPP(22, 0x4, 0, 0) +#define MPP22_SDIO0_BUS_POWER MPP(22, 0x5, 0, 0) +#define MPP22_SPI_1_MOSI MPP(22, 0x6, 0, 0) -#define MPP17_GPIO17 MPP_PIN(17, 0x0) -#define MPP17_AC97_SYSCLK_OUT MPP_PIN(17, 0x1) -#define MPP17_UA3_CTSn MPP_PIN(17, 0x2) -#define MPP17_SDIO0_WP MPP_PIN(17, 0x3) -#define MPP17_TW_SDA2 MPP_PIN(17, 0x4) -#define MPP17_AC97_SDATA_IN2 MPP_PIN(17, 0x5) +#define MPP23_GPIO23 MPP(23, 0x0, 1, 1) +#define MPP23_SPI_LCD_SCK MPP(23, 0x2, 0, 0) +#define MPP23_SDIO1_LED_CTRL MPP(23, 0x3, 0, 0) +#define MPP23_SSP_SCLK MPP(23, 0x4, 0, 0) +#define MPP23_SDIO0_LED_CTRL MPP(23, 0x5, 0, 0) +#define MPP23_SPI_1_SCK MPP(23, 0x6, 0, 0) -#define MPP18_GPIO18 MPP_PIN(18, 0x0) -#define MPP18_UA3_TXD MPP_PIN(18, 0x2) -#define MPP18_SDIO0_BUS_POWER MPP_PIN(18, 0x3) -#define MPP18_LCD0_PWM MPP_PIN(18, 0x4) -#define MPP18_AC_SDATA_IN3 MPP_PIN(18, 0x5) +#define MPP_MAX 23 -#define MPP19_GPIO19 MPP_PIN(19, 0x0) -#define MPP19_UA3_RXD MPP_PIN(19, 0x2) -#define MPP19_SDIO0_LED_CTRL MPP_PIN(19, 0x3) -#define MPP19_TW_SCK2 MPP_PIN(19, 0x4) - -#define MPP20_GPIO20 MPP_PIN(20, 0x0) -#define MPP20_AC97_SYSCLK_OUT MPP_PIN(20, 0x1) -#define MPP20_SPI_LCD_MISO MPP_PIN(20, 0x2) -#define MPP20_SDIO1_CD MPP_PIN(20, 0x3) -#define MPP20_SDIO0_CD MPP_PIN(20, 0x5) -#define MPP20_SPI_1_MISO MPP_PIN(20, 0x6) - -#define MPP21_GPIO21 MPP_PIN(21, 0x0) -#define MPP21_UA1_RTSn MPP_PIN(21, 0x1) -#define MPP21_SPI_LCD_CS0 MPP_PIN(21, 0x2) -#define MPP21_SDIO1_WP MPP_PIN(21, 0x3) -#define MPP21_SSP_SFRM MPP_PIN(21, 0x4) -#define MPP21_SDIO0_WP MPP_PIN(21, 0x5) -#define MPP21_SPI_1_CS MPP_PIN(21, 0x6) - -#define MPP22_GPIO22 MPP_PIN(22, 0x0) -#define MPP22_UA1_CTSn MPP_PIN(22, 0x1) -#define MPP22_SPI_LCD_MOSI MPP_PIN(22, 0x2) -#define MPP22_SDIO1_BUS_POWER MPP_PIN(22, 0x3) -#define MPP22_SSP_TXD MPP_PIN(22, 0x4) -#define MPP22_SDIO0_BUS_POWER MPP_PIN(22, 0x5) -#define MPP22_SPI_1_MOSI MPP_PIN(22, 0x6) - -#define MPP23_GPIO23 MPP_PIN(23, 0x0) -#define MPP23_SPI_LCD_SCK MPP_PIN(23, 0x2) -#define MPP23_SDIO1_LED_CTRL MPP_PIN(23, 0x3) -#define MPP23_SSP_SCLK MPP_PIN(23, 0x4) -#define MPP23_SDIO0_LED_CTRL MPP_PIN(23, 0x5) -#define MPP23_SPI_1_SCK MPP_PIN(23, 0x6) +#define MPP_GRP(_grp, _mode) MPP((_grp), (_mode), 0, 0) /* for MPP groups _num is a group index */ enum dove_mpp_grp_idx { @@ -181,40 +153,44 @@ enum dove_mpp_grp_idx { MPP_46_51 = 1, MPP_58_61 = 5, MPP_62_63 = 4, + MPP_GRP_MAX = 5, }; -#define MPP24_39_GPIO MPP_GRP(MPP_24_39, 0x1) -#define MPP24_39_CAM MPP_GRP(MPP_24_39, 0x0) +#define MPP_GRP_24_39_GPIO MPP_GRP(MPP_24_39, 0x1) +#define MPP_GRP_24_39_CAM MPP_GRP(MPP_24_39, 0x0) -#define MPP40_45_GPIO MPP_GRP(MPP_40_45, 0x1) -#define MPP40_45_SD0 MPP_GRP(MPP_40_45, 0x0) +#define MPP_GRP_40_45_GPIO MPP_GRP(MPP_40_45, 0x1) +#define MPP_GRP_40_45_SD0 MPP_GRP(MPP_40_45, 0x0) -#define MPP46_51_GPIO MPP_GRP(MPP_46_51, 0x1) -#define MPP46_51_SD1 MPP_GRP(MPP_46_51, 0x0) +#define MPP_GRP_46_51_GPIO MPP_GRP(MPP_46_51, 0x1) +#define MPP_GRP_46_51_SD1 MPP_GRP(MPP_46_51, 0x0) -#define MPP58_61_GPIO MPP_GRP(MPP_58_61, 0x1) -#define MPP58_61_SPI MPP_GRP(MPP_58_61, 0x0) +#define MPP_GRP_58_61_GPIO MPP_GRP(MPP_58_61, 0x1) +#define MPP_GRP_58_61_SPI MPP_GRP(MPP_58_61, 0x0) -#define MPP62_63_GPIO MPP_GRP(MPP_62_63, 0x1) -#define MPP62_63_UA1 MPP_GRP(MPP_62_63, 0x0) +#define MPP_GRP_62_63_GPIO MPP_GRP(MPP_62_63, 0x1) +#define MPP_GRP_62_63_UA1 MPP_GRP(MPP_62_63, 0x0) /* The MPP[64:71] control differs from other groups */ -#define MPP64_71_GPO MPP_GRP_NFC(0x1) -#define MPP64_71_NFC MPP_GRP_NFC(0x0) +#define MPP_GRP_NFC_64_71_GPO 0x1 +#define MPP_GRP_NFC_64_71_NFC 0x0 /* * The MPP[52:57] functionality is encoded by 4 bits in different * registers. The _num field in this case encodes those bits in * correspodence with Table 135 of 88AP510 Functional specification */ -#define MPP52_57_AU1 MPP_GRP_AU1(0x0) -#define MPP52_57_AU1_GPIO57 MPP_GRP_AU1(0x2) -#define MPP52_57_GPIO MPP_GRP_AU1(0xa) -#define MPP52_57_TW_GPIO MPP_GRP_AU1(0xb) -#define MPP52_57_AU1_SSP MPP_GRP_AU1(0xc) -#define MPP52_57_SSP_GPIO MPP_GRP_AU1(0xe) -#define MPP52_57_SSP_TW MPP_GRP_AU1(0xf) +#define MPP_GRP_AU1_52_57_AU1 0x0 +#define MPP_GRP_AU1_52_57_AU1_GPIO57 0x2 +#define MPP_GRP_AU1_52_57_GPIO 0xa +#define MPP_GRP_AU1_52_57_TW_GPIO 0xb +#define MPP_GRP_AU1_52_57_AU1_SSP 0xc +#define MPP_GRP_AU1_52_57_SSP_GPIO 0xe +#define MPP_GRP_AU1_52_57_SSP_TW 0xf -void dove_mpp_conf(unsigned int *mpp_list); +void dove_mpp_conf(unsigned int *mpp_list, + unsigned int *mpp_grp_list, + unsigned int grp_au1_52_57, + unsigned int grp_nfc_64_71); #endif /* __ARCH_DOVE_MPP_CODED_H */ diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index 82079545adc4..1d4b65fd673e 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -402,11 +402,15 @@ static struct resource ep93xx_eth_resource[] = { } }; +static u64 ep93xx_eth_dma_mask = DMA_BIT_MASK(32); + static struct platform_device ep93xx_eth_device = { .name = "ep93xx-eth", .id = -1, .dev = { - .platform_data = &ep93xx_eth_data, + .platform_data = &ep93xx_eth_data, + .coherent_dma_mask = DMA_BIT_MASK(32), + .dma_mask = &ep93xx_eth_dma_mask, }, .num_resources = ARRAY_SIZE(ep93xx_eth_resource), .resource = ep93xx_eth_resource, diff --git a/arch/arm/mach-ep93xx/gpio.c b/arch/arm/mach-ep93xx/gpio.c index a5a9ff70b198..415dce37b88c 100644 --- a/arch/arm/mach-ep93xx/gpio.c +++ b/arch/arm/mach-ep93xx/gpio.c @@ -356,29 +356,6 @@ static int ep93xx_gpio_set_debounce(struct gpio_chip *chip, return 0; } -static void ep93xx_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) -{ - struct ep93xx_gpio_chip *ep93xx_chip = to_ep93xx_gpio_chip(chip); - u8 data_reg, data_dir_reg; - int gpio, i; - - data_reg = __raw_readb(ep93xx_chip->data_reg); - data_dir_reg = __raw_readb(ep93xx_chip->data_dir_reg); - - gpio = ep93xx_chip->chip.base; - for (i = 0; i < chip->ngpio; i++, gpio++) { - int is_out = data_dir_reg & (1 << i); - int irq = gpio_to_irq(gpio); - - seq_printf(s, " %s%d gpio-%-3d (%-12s) %s %s %s\n", - chip->label, i, gpio, - gpiochip_is_requested(chip, i) ? : "", - is_out ? "out" : "in ", - (data_reg & (1<< i)) ? "hi" : "lo", - (!is_out && irq>= 0) ? "(interrupt)" : ""); - } -} - #define EP93XX_GPIO_BANK(name, dr, ddr, base_gpio) \ { \ .chip = { \ @@ -387,7 +364,6 @@ static void ep93xx_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) .direction_output = ep93xx_gpio_direction_output, \ .get = ep93xx_gpio_get, \ .set = ep93xx_gpio_set, \ - .dbg_show = ep93xx_gpio_dbg_show, \ .base = base_gpio, \ .ngpio = 8, \ }, \ diff --git a/arch/arm/mach-exynos4/Kconfig b/arch/arm/mach-exynos4/Kconfig index e849f67be47d..1435fc31c4b2 100644 --- a/arch/arm/mach-exynos4/Kconfig +++ b/arch/arm/mach-exynos4/Kconfig @@ -91,6 +91,11 @@ config EXYNOS4_SETUP_FIMC help Common setup code for the camera interfaces. +config EXYNOS4_SETUP_USB_PHY + bool + help + Common setup code for USB PHY controller + # machine support menu "EXYNOS4 Machines" @@ -169,10 +174,14 @@ config MACH_NURI select S3C_DEV_HSMMC2 select S3C_DEV_HSMMC3 select S3C_DEV_I2C1 + select S3C_DEV_I2C3 select S3C_DEV_I2C5 + select S5P_DEV_USB_EHCI select EXYNOS4_SETUP_I2C1 + select EXYNOS4_SETUP_I2C3 select EXYNOS4_SETUP_I2C5 select EXYNOS4_SETUP_SDHCI + select EXYNOS4_SETUP_USB_PHY select SAMSUNG_DEV_PWM help Machine support for Samsung Mobile NURI Board. diff --git a/arch/arm/mach-exynos4/Makefile b/arch/arm/mach-exynos4/Makefile index 9be104f63c0b..60fe5ecf3599 100644 --- a/arch/arm/mach-exynos4/Makefile +++ b/arch/arm/mach-exynos4/Makefile @@ -13,9 +13,10 @@ obj- := # Core support for EXYNOS4 system obj-$(CONFIG_CPU_EXYNOS4210) += cpu.o init.o clock.o irq-combiner.o -obj-$(CONFIG_CPU_EXYNOS4210) += setup-i2c0.o gpiolib.o irq-eint.o dma.o +obj-$(CONFIG_CPU_EXYNOS4210) += setup-i2c0.o irq-eint.o dma.o obj-$(CONFIG_PM) += pm.o sleep.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o +obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_SMP) += platsmp.o headsmp.o @@ -54,3 +55,5 @@ obj-$(CONFIG_EXYNOS4_SETUP_I2C7) += setup-i2c7.o obj-$(CONFIG_EXYNOS4_SETUP_KEYPAD) += setup-keypad.o obj-$(CONFIG_EXYNOS4_SETUP_SDHCI) += setup-sdhci.o obj-$(CONFIG_EXYNOS4_SETUP_SDHCI_GPIO) += setup-sdhci-gpio.o + +obj-$(CONFIG_EXYNOS4_SETUP_USB_PHY) += setup-usb-phy.o diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c index 793011391943..9babe4473e88 100644 --- a/arch/arm/mach-exynos4/cpu.c +++ b/arch/arm/mach-exynos4/cpu.c @@ -97,7 +97,12 @@ static struct map_desc exynos4_iodesc[] __initdata = { .pfn = __phys_to_pfn(EXYNOS4_PA_SROMC), .length = SZ_4K, .type = MT_DEVICE, - }, + }, { + .virtual = (unsigned long)S3C_VA_USB_HSPHY, + .pfn = __phys_to_pfn(EXYNOS4_PA_HSPHY), + .length = SZ_4K, + .type = MT_DEVICE, + } }; static void exynos4_idle(void) diff --git a/arch/arm/mach-exynos4/cpuidle.c b/arch/arm/mach-exynos4/cpuidle.c new file mode 100644 index 000000000000..bf7e96f2793a --- /dev/null +++ b/arch/arm/mach-exynos4/cpuidle.c @@ -0,0 +1,86 @@ +/* linux/arch/arm/mach-exynos4/cpuidle.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#include +#include +#include +#include + +#include + +static int exynos4_enter_idle(struct cpuidle_device *dev, + struct cpuidle_state *state); + +static struct cpuidle_state exynos4_cpuidle_set[] = { + [0] = { + .enter = exynos4_enter_idle, + .exit_latency = 1, + .target_residency = 100000, + .flags = CPUIDLE_FLAG_TIME_VALID, + .name = "IDLE", + .desc = "ARM clock gating(WFI)", + }, +}; + +static DEFINE_PER_CPU(struct cpuidle_device, exynos4_cpuidle_device); + +static struct cpuidle_driver exynos4_idle_driver = { + .name = "exynos4_idle", + .owner = THIS_MODULE, +}; + +static int exynos4_enter_idle(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + struct timeval before, after; + int idle_time; + + local_irq_disable(); + do_gettimeofday(&before); + + cpu_do_idle(); + + do_gettimeofday(&after); + local_irq_enable(); + idle_time = (after.tv_sec - before.tv_sec) * USEC_PER_SEC + + (after.tv_usec - before.tv_usec); + + return idle_time; +} + +static int __init exynos4_init_cpuidle(void) +{ + int i, max_cpuidle_state, cpu_id; + struct cpuidle_device *device; + + cpuidle_register_driver(&exynos4_idle_driver); + + for_each_cpu(cpu_id, cpu_online_mask) { + device = &per_cpu(exynos4_cpuidle_device, cpu_id); + device->cpu = cpu_id; + + device->state_count = (sizeof(exynos4_cpuidle_set) / + sizeof(struct cpuidle_state)); + + max_cpuidle_state = device->state_count; + + for (i = 0; i < max_cpuidle_state; i++) { + memcpy(&device->states[i], &exynos4_cpuidle_set[i], + sizeof(struct cpuidle_state)); + } + + if (cpuidle_register_device(device)) { + printk(KERN_ERR "CPUidle register device failed\n,"); + return -EIO; + } + } + return 0; +} +device_initcall(exynos4_init_cpuidle); diff --git a/arch/arm/mach-exynos4/gpiolib.c b/arch/arm/mach-exynos4/gpiolib.c deleted file mode 100644 index d54ca6adb660..000000000000 --- a/arch/arm/mach-exynos4/gpiolib.c +++ /dev/null @@ -1,365 +0,0 @@ -/* linux/arch/arm/mach-exynos4/gpiolib.c - * - * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd. - * http://www.samsung.com - * - * EXYNOS4 - GPIOlib support - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#include -#include -#include -#include - -#include - -#include -#include -#include - -static struct s3c_gpio_cfg gpio_cfg = { - .set_config = s3c_gpio_setcfg_s3c64xx_4bit, - .set_pull = s3c_gpio_setpull_updown, - .get_pull = s3c_gpio_getpull_updown, -}; - -static struct s3c_gpio_cfg gpio_cfg_noint = { - .set_config = s3c_gpio_setcfg_s3c64xx_4bit, - .set_pull = s3c_gpio_setpull_updown, - .get_pull = s3c_gpio_getpull_updown, -}; - -/* - * Following are the gpio banks in v310. - * - * The 'config' member when left to NULL, is initialized to the default - * structure gpio_cfg in the init function below. - * - * The 'base' member is also initialized in the init function below. - * Note: The initialization of 'base' member of s3c_gpio_chip structure - * uses the above macro and depends on the banks being listed in order here. - */ -static struct s3c_gpio_chip exynos4_gpio_part1_4bit[] = { - { - .chip = { - .base = EXYNOS4_GPA0(0), - .ngpio = EXYNOS4_GPIO_A0_NR, - .label = "GPA0", - }, - }, { - .chip = { - .base = EXYNOS4_GPA1(0), - .ngpio = EXYNOS4_GPIO_A1_NR, - .label = "GPA1", - }, - }, { - .chip = { - .base = EXYNOS4_GPB(0), - .ngpio = EXYNOS4_GPIO_B_NR, - .label = "GPB", - }, - }, { - .chip = { - .base = EXYNOS4_GPC0(0), - .ngpio = EXYNOS4_GPIO_C0_NR, - .label = "GPC0", - }, - }, { - .chip = { - .base = EXYNOS4_GPC1(0), - .ngpio = EXYNOS4_GPIO_C1_NR, - .label = "GPC1", - }, - }, { - .chip = { - .base = EXYNOS4_GPD0(0), - .ngpio = EXYNOS4_GPIO_D0_NR, - .label = "GPD0", - }, - }, { - .chip = { - .base = EXYNOS4_GPD1(0), - .ngpio = EXYNOS4_GPIO_D1_NR, - .label = "GPD1", - }, - }, { - .chip = { - .base = EXYNOS4_GPE0(0), - .ngpio = EXYNOS4_GPIO_E0_NR, - .label = "GPE0", - }, - }, { - .chip = { - .base = EXYNOS4_GPE1(0), - .ngpio = EXYNOS4_GPIO_E1_NR, - .label = "GPE1", - }, - }, { - .chip = { - .base = EXYNOS4_GPE2(0), - .ngpio = EXYNOS4_GPIO_E2_NR, - .label = "GPE2", - }, - }, { - .chip = { - .base = EXYNOS4_GPE3(0), - .ngpio = EXYNOS4_GPIO_E3_NR, - .label = "GPE3", - }, - }, { - .chip = { - .base = EXYNOS4_GPE4(0), - .ngpio = EXYNOS4_GPIO_E4_NR, - .label = "GPE4", - }, - }, { - .chip = { - .base = EXYNOS4_GPF0(0), - .ngpio = EXYNOS4_GPIO_F0_NR, - .label = "GPF0", - }, - }, { - .chip = { - .base = EXYNOS4_GPF1(0), - .ngpio = EXYNOS4_GPIO_F1_NR, - .label = "GPF1", - }, - }, { - .chip = { - .base = EXYNOS4_GPF2(0), - .ngpio = EXYNOS4_GPIO_F2_NR, - .label = "GPF2", - }, - }, { - .chip = { - .base = EXYNOS4_GPF3(0), - .ngpio = EXYNOS4_GPIO_F3_NR, - .label = "GPF3", - }, - }, -}; - -static struct s3c_gpio_chip exynos4_gpio_part2_4bit[] = { - { - .chip = { - .base = EXYNOS4_GPJ0(0), - .ngpio = EXYNOS4_GPIO_J0_NR, - .label = "GPJ0", - }, - }, { - .chip = { - .base = EXYNOS4_GPJ1(0), - .ngpio = EXYNOS4_GPIO_J1_NR, - .label = "GPJ1", - }, - }, { - .chip = { - .base = EXYNOS4_GPK0(0), - .ngpio = EXYNOS4_GPIO_K0_NR, - .label = "GPK0", - }, - }, { - .chip = { - .base = EXYNOS4_GPK1(0), - .ngpio = EXYNOS4_GPIO_K1_NR, - .label = "GPK1", - }, - }, { - .chip = { - .base = EXYNOS4_GPK2(0), - .ngpio = EXYNOS4_GPIO_K2_NR, - .label = "GPK2", - }, - }, { - .chip = { - .base = EXYNOS4_GPK3(0), - .ngpio = EXYNOS4_GPIO_K3_NR, - .label = "GPK3", - }, - }, { - .chip = { - .base = EXYNOS4_GPL0(0), - .ngpio = EXYNOS4_GPIO_L0_NR, - .label = "GPL0", - }, - }, { - .chip = { - .base = EXYNOS4_GPL1(0), - .ngpio = EXYNOS4_GPIO_L1_NR, - .label = "GPL1", - }, - }, { - .chip = { - .base = EXYNOS4_GPL2(0), - .ngpio = EXYNOS4_GPIO_L2_NR, - .label = "GPL2", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY0(0), - .ngpio = EXYNOS4_GPIO_Y0_NR, - .label = "GPY0", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY1(0), - .ngpio = EXYNOS4_GPIO_Y1_NR, - .label = "GPY1", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY2(0), - .ngpio = EXYNOS4_GPIO_Y2_NR, - .label = "GPY2", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY3(0), - .ngpio = EXYNOS4_GPIO_Y3_NR, - .label = "GPY3", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY4(0), - .ngpio = EXYNOS4_GPIO_Y4_NR, - .label = "GPY4", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY5(0), - .ngpio = EXYNOS4_GPIO_Y5_NR, - .label = "GPY5", - }, - }, { - .config = &gpio_cfg_noint, - .chip = { - .base = EXYNOS4_GPY6(0), - .ngpio = EXYNOS4_GPIO_Y6_NR, - .label = "GPY6", - }, - }, { - .base = (S5P_VA_GPIO2 + 0xC00), - .config = &gpio_cfg_noint, - .irq_base = IRQ_EINT(0), - .chip = { - .base = EXYNOS4_GPX0(0), - .ngpio = EXYNOS4_GPIO_X0_NR, - .label = "GPX0", - .to_irq = samsung_gpiolib_to_irq, - }, - }, { - .base = (S5P_VA_GPIO2 + 0xC20), - .config = &gpio_cfg_noint, - .irq_base = IRQ_EINT(8), - .chip = { - .base = EXYNOS4_GPX1(0), - .ngpio = EXYNOS4_GPIO_X1_NR, - .label = "GPX1", - .to_irq = samsung_gpiolib_to_irq, - }, - }, { - .base = (S5P_VA_GPIO2 + 0xC40), - .config = &gpio_cfg_noint, - .irq_base = IRQ_EINT(16), - .chip = { - .base = EXYNOS4_GPX2(0), - .ngpio = EXYNOS4_GPIO_X2_NR, - .label = "GPX2", - .to_irq = samsung_gpiolib_to_irq, - }, - }, { - .base = (S5P_VA_GPIO2 + 0xC60), - .config = &gpio_cfg_noint, - .irq_base = IRQ_EINT(24), - .chip = { - .base = EXYNOS4_GPX3(0), - .ngpio = EXYNOS4_GPIO_X3_NR, - .label = "GPX3", - .to_irq = samsung_gpiolib_to_irq, - }, - }, -}; - -static struct s3c_gpio_chip exynos4_gpio_part3_4bit[] = { - { - .chip = { - .base = EXYNOS4_GPZ(0), - .ngpio = EXYNOS4_GPIO_Z_NR, - .label = "GPZ", - }, - }, -}; - -static __init int exynos4_gpiolib_init(void) -{ - struct s3c_gpio_chip *chip; - int i; - int group = 0; - int nr_chips; - - /* GPIO part 1 */ - - chip = exynos4_gpio_part1_4bit; - nr_chips = ARRAY_SIZE(exynos4_gpio_part1_4bit); - - for (i = 0; i < nr_chips; i++, chip++) { - if (chip->config == NULL) { - chip->config = &gpio_cfg; - /* Assign the GPIO interrupt group */ - chip->group = group++; - } - if (chip->base == NULL) - chip->base = S5P_VA_GPIO1 + (i) * 0x20; - } - - samsung_gpiolib_add_4bit_chips(exynos4_gpio_part1_4bit, nr_chips); - - /* GPIO part 2 */ - - chip = exynos4_gpio_part2_4bit; - nr_chips = ARRAY_SIZE(exynos4_gpio_part2_4bit); - - for (i = 0; i < nr_chips; i++, chip++) { - if (chip->config == NULL) { - chip->config = &gpio_cfg; - /* Assign the GPIO interrupt group */ - chip->group = group++; - } - if (chip->base == NULL) - chip->base = S5P_VA_GPIO2 + (i) * 0x20; - } - - samsung_gpiolib_add_4bit_chips(exynos4_gpio_part2_4bit, nr_chips); - - /* GPIO part 3 */ - - chip = exynos4_gpio_part3_4bit; - nr_chips = ARRAY_SIZE(exynos4_gpio_part3_4bit); - - for (i = 0; i < nr_chips; i++, chip++) { - if (chip->config == NULL) { - chip->config = &gpio_cfg; - /* Assign the GPIO interrupt group */ - chip->group = group++; - } - if (chip->base == NULL) - chip->base = S5P_VA_GPIO3 + (i) * 0x20; - } - - samsung_gpiolib_add_4bit_chips(exynos4_gpio_part3_4bit, nr_chips); - s5p_register_gpioint_bank(IRQ_GPIO_XA, 0, IRQ_GPIO1_NR_GROUPS); - s5p_register_gpioint_bank(IRQ_GPIO_XB, IRQ_GPIO1_NR_GROUPS, IRQ_GPIO2_NR_GROUPS); - - return 0; -} -core_initcall(exynos4_gpiolib_init); diff --git a/arch/arm/mach-exynos4/include/mach/map.h b/arch/arm/mach-exynos4/include/mach/map.h index 6330b73b9ea7..0009e77a05fc 100644 --- a/arch/arm/mach-exynos4/include/mach/map.h +++ b/arch/arm/mach-exynos4/include/mach/map.h @@ -101,6 +101,9 @@ #define EXYNOS4_PA_SROMC 0x12570000 +#define EXYNOS4_PA_EHCI 0x12580000 +#define EXYNOS4_PA_HSPHY 0x125B0000 + #define EXYNOS4_PA_UART 0x13800000 #define EXYNOS4_PA_IIC(x) (0x13860000 + ((x) * 0x10000)) @@ -143,6 +146,7 @@ #define S5P_PA_SROMC EXYNOS4_PA_SROMC #define S5P_PA_SYSCON EXYNOS4_PA_SYSCON #define S5P_PA_TIMER EXYNOS4_PA_TIMER +#define S5P_PA_EHCI EXYNOS4_PA_EHCI #define SAMSUNG_PA_KEYPAD EXYNOS4_PA_KEYPAD diff --git a/arch/arm/mach-exynos4/include/mach/regs-pmu.h b/arch/arm/mach-exynos4/include/mach/regs-pmu.h index 62b0014d05e0..a9643371f8e7 100644 --- a/arch/arm/mach-exynos4/include/mach/regs-pmu.h +++ b/arch/arm/mach-exynos4/include/mach/regs-pmu.h @@ -33,6 +33,9 @@ #define S5P_EINT_WAKEUP_MASK S5P_PMUREG(0x0604) #define S5P_WAKEUP_MASK S5P_PMUREG(0x0608) +#define S5P_USBHOST_PHY_CONTROL S5P_PMUREG(0x0708) +#define S5P_USBHOST_PHY_ENABLE (1 << 0) + #define S5P_MIPI_DPHY_CONTROL(n) S5P_PMUREG(0x0710 + (n) * 4) #define S5P_MIPI_DPHY_ENABLE (1 << 0) #define S5P_MIPI_DPHY_SRESETN (1 << 1) diff --git a/arch/arm/mach-exynos4/include/mach/regs-usb-phy.h b/arch/arm/mach-exynos4/include/mach/regs-usb-phy.h new file mode 100644 index 000000000000..c337cf3a71bf --- /dev/null +++ b/arch/arm/mach-exynos4/include/mach/regs-usb-phy.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2011 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __PLAT_S5P_REGS_USB_PHY_H +#define __PLAT_S5P_REGS_USB_PHY_H + +#define EXYNOS4_HSOTG_PHYREG(x) ((x) + S3C_VA_USB_HSPHY) + +#define EXYNOS4_PHYPWR EXYNOS4_HSOTG_PHYREG(0x00) +#define PHY1_HSIC_NORMAL_MASK (0xf << 9) +#define PHY1_HSIC1_SLEEP (1 << 12) +#define PHY1_HSIC1_FORCE_SUSPEND (1 << 11) +#define PHY1_HSIC0_SLEEP (1 << 10) +#define PHY1_HSIC0_FORCE_SUSPEND (1 << 9) + +#define PHY1_STD_NORMAL_MASK (0x7 << 6) +#define PHY1_STD_SLEEP (1 << 8) +#define PHY1_STD_ANALOG_POWERDOWN (1 << 7) +#define PHY1_STD_FORCE_SUSPEND (1 << 6) + +#define PHY0_NORMAL_MASK (0x39 << 0) +#define PHY0_SLEEP (1 << 5) +#define PHY0_OTG_DISABLE (1 << 4) +#define PHY0_ANALOG_POWERDOWN (1 << 3) +#define PHY0_FORCE_SUSPEND (1 << 0) + +#define EXYNOS4_PHYCLK EXYNOS4_HSOTG_PHYREG(0x04) +#define PHY1_COMMON_ON_N (1 << 7) +#define PHY0_COMMON_ON_N (1 << 4) +#define PHY0_ID_PULLUP (1 << 2) +#define CLKSEL_MASK (0x3 << 0) +#define CLKSEL_SHIFT (0) +#define CLKSEL_48M (0x0 << 0) +#define CLKSEL_12M (0x2 << 0) +#define CLKSEL_24M (0x3 << 0) + +#define EXYNOS4_RSTCON EXYNOS4_HSOTG_PHYREG(0x08) +#define HOST_LINK_PORT_SWRST_MASK (0xf << 6) +#define HOST_LINK_PORT2_SWRST (1 << 9) +#define HOST_LINK_PORT1_SWRST (1 << 8) +#define HOST_LINK_PORT0_SWRST (1 << 7) +#define HOST_LINK_ALL_SWRST (1 << 6) + +#define PHY1_SWRST_MASK (0x7 << 3) +#define PHY1_HSIC_SWRST (1 << 5) +#define PHY1_STD_SWRST (1 << 4) +#define PHY1_ALL_SWRST (1 << 3) + +#define PHY0_SWRST_MASK (0x7 << 0) +#define PHY0_PHYLINK_SWRST (1 << 2) +#define PHY0_HLINK_SWRST (1 << 1) +#define PHY0_SWRST (1 << 0) + +#define EXYNOS4_PHY1CON EXYNOS4_HSOTG_PHYREG(0x34) +#define FPENABLEN (1 << 0) + +#endif /* __PLAT_S5P_REGS_USB_PHY_H */ diff --git a/arch/arm/mach-exynos4/include/mach/smp.h b/arch/arm/mach-exynos4/include/mach/smp.h deleted file mode 100644 index a463dcebcfd3..000000000000 --- a/arch/arm/mach-exynos4/include/mach/smp.h +++ /dev/null @@ -1,19 +0,0 @@ -/* linux/arch/arm/mach-exynos4/include/mach/smp.h - * - * Cloned from arch/arm/mach-realview/include/mach/smp.h -*/ - -#ifndef ASM_ARCH_SMP_H -#define ASM_ARCH_SMP_H __FILE__ - -#include - -/* - * We use IRQ1 as the IPI - */ -static inline void smp_cross_call(const struct cpumask *mask, int ipi) -{ - gic_raise_softirq(mask, ipi); -} - -#endif diff --git a/arch/arm/mach-exynos4/irq-combiner.c b/arch/arm/mach-exynos4/irq-combiner.c index f488b66d6806..5a2758ab055e 100644 --- a/arch/arm/mach-exynos4/irq-combiner.c +++ b/arch/arm/mach-exynos4/irq-combiner.c @@ -59,8 +59,7 @@ static void combiner_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) unsigned int cascade_irq, combiner_irq; unsigned long status; - /* primary controller ack'ing */ - chip->irq_ack(&desc->irq_data); + chained_irq_enter(chip, desc); spin_lock(&irq_controller_lock); status = __raw_readl(chip_data->base + COMBINER_INT_STATUS); @@ -79,8 +78,7 @@ static void combiner_handle_cascade_irq(unsigned int irq, struct irq_desc *desc) generic_handle_irq(cascade_irq); out: - /* primary controller unmasking */ - chip->irq_unmask(&desc->irq_data); + chained_irq_exit(chip, desc); } static struct irq_chip combiner_chip = { diff --git a/arch/arm/mach-exynos4/mach-nuri.c b/arch/arm/mach-exynos4/mach-nuri.c index b79ad010d194..642702bb5b12 100644 --- a/arch/arm/mach-exynos4/mach-nuri.c +++ b/arch/arm/mach-exynos4/mach-nuri.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,10 @@ #include #include #include +#include +#include +#include +#include #include @@ -257,11 +262,103 @@ static struct i2c_board_info i2c1_devs[] __initdata = { /* Gyro, To be updated */ }; +/* TSP */ +static u8 mxt_init_vals[] = { + /* MXT_GEN_COMMAND(6) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* MXT_GEN_POWER(7) */ + 0x20, 0xff, 0x32, + /* MXT_GEN_ACQUIRE(8) */ + 0x0a, 0x00, 0x05, 0x00, 0x00, 0x00, 0x09, 0x23, + /* MXT_TOUCH_MULTI(9) */ + 0x00, 0x00, 0x00, 0x13, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, 0x01, 0x01, 0x0e, 0x0a, 0x0a, 0x0a, 0x0a, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, + /* MXT_TOUCH_KEYARRAY(15) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, + 0x00, + /* MXT_SPT_GPIOPWM(19) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* MXT_PROCI_GRIPFACE(20) */ + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x28, 0x04, + 0x0f, 0x0a, + /* MXT_PROCG_NOISE(22) */ + 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x23, 0x00, + 0x00, 0x05, 0x0f, 0x19, 0x23, 0x2d, 0x03, + /* MXT_TOUCH_PROXIMITY(23) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, + /* MXT_PROCI_ONETOUCH(24) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* MXT_SPT_SELFTEST(25) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + /* MXT_PROCI_TWOTOUCH(27) */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* MXT_SPT_CTECONFIG(28) */ + 0x00, 0x00, 0x02, 0x08, 0x10, 0x00, +}; + +static struct mxt_platform_data mxt_platform_data = { + .config = mxt_init_vals, + .config_length = ARRAY_SIZE(mxt_init_vals), + + .x_line = 18, + .y_line = 11, + .x_size = 1024, + .y_size = 600, + .blen = 0x1, + .threshold = 0x28, + .voltage = 2800000, /* 2.8V */ + .orient = MXT_DIAGONAL_COUNTER, + .irqflags = IRQF_TRIGGER_FALLING, +}; + +static struct s3c2410_platform_i2c i2c3_data __initdata = { + .flags = 0, + .bus_num = 3, + .slave_addr = 0x10, + .frequency = 400 * 1000, + .sda_delay = 100, +}; + +static struct i2c_board_info i2c3_devs[] __initdata = { + { + I2C_BOARD_INFO("atmel_mxt_ts", 0x4a), + .platform_data = &mxt_platform_data, + .irq = IRQ_EINT(4), + }, +}; + +static void __init nuri_tsp_init(void) +{ + int gpio; + + /* TOUCH_INT: XEINT_4 */ + gpio = EXYNOS4_GPX0(4); + gpio_request(gpio, "TOUCH_INT"); + s3c_gpio_cfgpin(gpio, S3C_GPIO_SFN(0xf)); + s3c_gpio_setpull(gpio, S3C_GPIO_PULL_UP); +} + /* GPIO I2C 5 (PMIC) */ static struct i2c_board_info i2c5_devs[] __initdata = { /* max8997, To be updated */ }; +/* USB EHCI */ +static struct s5p_ehci_platdata nuri_ehci_pdata; + +static void __init nuri_ehci_init(void) +{ + struct s5p_ehci_platdata *pdata = &nuri_ehci_pdata; + + s5p_ehci_set_platdata(pdata); +} + static struct platform_device *nuri_devices[] __initdata = { /* Samsung Platform Devices */ &emmc_fixed_voltage, @@ -270,6 +367,8 @@ static struct platform_device *nuri_devices[] __initdata = { &s3c_device_hsmmc3, &s3c_device_wdt, &s3c_device_timer[0], + &s5p_device_ehci, + &s3c_device_i2c3, /* NURI Devices */ &nuri_gpio_keys, @@ -287,10 +386,16 @@ static void __init nuri_map_io(void) static void __init nuri_machine_init(void) { nuri_sdhci_init(); + nuri_tsp_init(); i2c_register_board_info(1, i2c1_devs, ARRAY_SIZE(i2c1_devs)); + s3c_i2c3_set_platdata(&i2c3_data); + i2c_register_board_info(3, i2c3_devs, ARRAY_SIZE(i2c3_devs)); i2c_register_board_info(5, i2c5_devs, ARRAY_SIZE(i2c5_devs)); + nuri_ehci_init(); + clk_xusbxti.rate = 24000000; + /* Last */ platform_add_devices(nuri_devices, ARRAY_SIZE(nuri_devices)); } diff --git a/arch/arm/mach-exynos4/platsmp.c b/arch/arm/mach-exynos4/platsmp.c index 6d35878ec1aa..c5e65a02be8d 100644 --- a/arch/arm/mach-exynos4/platsmp.c +++ b/arch/arm/mach-exynos4/platsmp.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -104,7 +105,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) * the boot monitor to read the system wide flags register, * and branch to the address found there. */ - smp_cross_call(cpumask_of(cpu), 1); + gic_raise_softirq(cpumask_of(cpu), 1); timeout = jiffies + (1 * HZ); while (time_before(jiffies, timeout)) { @@ -147,6 +148,8 @@ void __init smp_init_cpus(void) for (i = 0; i < ncores; i++) set_cpu_possible(i, true); + + set_smp_cross_call(gic_raise_softirq); } void __init platform_smp_prepare_cpus(unsigned int max_cpus) diff --git a/arch/arm/mach-exynos4/pm.c b/arch/arm/mach-exynos4/pm.c index 10d917d9e3ad..8755ca8dd48d 100644 --- a/arch/arm/mach-exynos4/pm.c +++ b/arch/arm/mach-exynos4/pm.c @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -372,7 +373,27 @@ void exynos4_scu_enable(void __iomem *scu_base) flush_cache_all(); } -static int exynos4_pm_resume(struct sys_device *dev) +static struct sysdev_driver exynos4_pm_driver = { + .add = exynos4_pm_add, +}; + +static __init int exynos4_pm_drvinit(void) +{ + unsigned int tmp; + + s3c_pm_init(); + + /* All wakeup disable */ + + tmp = __raw_readl(S5P_WAKEUP_MASK); + tmp |= ((0xFF << 8) | (0x1F << 1)); + __raw_writel(tmp, S5P_WAKEUP_MASK); + + return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver); +} +arch_initcall(exynos4_pm_drvinit); + +static void exynos4_pm_resume(void) { /* For release retention */ @@ -394,27 +415,15 @@ static int exynos4_pm_resume(struct sys_device *dev) /* enable L2X0*/ writel_relaxed(1, S5P_VA_L2CC + L2X0_CTRL); #endif - - return 0; } -static struct sysdev_driver exynos4_pm_driver = { - .add = exynos4_pm_add, +static struct syscore_ops exynos4_pm_syscore_ops = { .resume = exynos4_pm_resume, }; -static __init int exynos4_pm_drvinit(void) +static __init int exynos4_pm_syscore_init(void) { - unsigned int tmp; - - s3c_pm_init(); - - /* All wakeup disable */ - - tmp = __raw_readl(S5P_WAKEUP_MASK); - tmp |= ((0xFF << 8) | (0x1F << 1)); - __raw_writel(tmp, S5P_WAKEUP_MASK); - - return sysdev_driver_register(&exynos4_sysclass, &exynos4_pm_driver); + register_syscore_ops(&exynos4_pm_syscore_ops); + return 0; } -arch_initcall(exynos4_pm_drvinit); +arch_initcall(exynos4_pm_syscore_init); diff --git a/arch/arm/mach-exynos4/setup-usb-phy.c b/arch/arm/mach-exynos4/setup-usb-phy.c new file mode 100644 index 000000000000..0883c1b824b9 --- /dev/null +++ b/arch/arm/mach-exynos4/setup-usb-phy.c @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2011 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int exynos4_usb_phy1_init(struct platform_device *pdev) +{ + struct clk *otg_clk; + struct clk *xusbxti_clk; + u32 phyclk; + u32 rstcon; + int err; + + otg_clk = clk_get(&pdev->dev, "otg"); + if (IS_ERR(otg_clk)) { + dev_err(&pdev->dev, "Failed to get otg clock\n"); + return PTR_ERR(otg_clk); + } + + err = clk_enable(otg_clk); + if (err) { + clk_put(otg_clk); + return err; + } + + writel(readl(S5P_USBHOST_PHY_CONTROL) | S5P_USBHOST_PHY_ENABLE, + S5P_USBHOST_PHY_CONTROL); + + /* set clock frequency for PLL */ + phyclk = readl(EXYNOS4_PHYCLK) & ~CLKSEL_MASK; + + xusbxti_clk = clk_get(&pdev->dev, "xusbxti"); + if (xusbxti_clk && !IS_ERR(xusbxti_clk)) { + switch (clk_get_rate(xusbxti_clk)) { + case 12 * MHZ: + phyclk |= CLKSEL_12M; + break; + case 24 * MHZ: + phyclk |= CLKSEL_24M; + break; + default: + case 48 * MHZ: + /* default reference clock */ + break; + } + clk_put(xusbxti_clk); + } + + writel(phyclk, EXYNOS4_PHYCLK); + + /* floating prevention logic: disable */ + writel((readl(EXYNOS4_PHY1CON) | FPENABLEN), EXYNOS4_PHY1CON); + + /* set to normal HSIC 0 and 1 of PHY1 */ + writel((readl(EXYNOS4_PHYPWR) & ~PHY1_HSIC_NORMAL_MASK), + EXYNOS4_PHYPWR); + + /* set to normal standard USB of PHY1 */ + writel((readl(EXYNOS4_PHYPWR) & ~PHY1_STD_NORMAL_MASK), EXYNOS4_PHYPWR); + + /* reset all ports of both PHY and Link */ + rstcon = readl(EXYNOS4_RSTCON) | HOST_LINK_PORT_SWRST_MASK | + PHY1_SWRST_MASK; + writel(rstcon, EXYNOS4_RSTCON); + udelay(10); + + rstcon &= ~(HOST_LINK_PORT_SWRST_MASK | PHY1_SWRST_MASK); + writel(rstcon, EXYNOS4_RSTCON); + udelay(50); + + clk_disable(otg_clk); + clk_put(otg_clk); + + return 0; +} + +static int exynos4_usb_phy1_exit(struct platform_device *pdev) +{ + struct clk *otg_clk; + int err; + + otg_clk = clk_get(&pdev->dev, "otg"); + if (IS_ERR(otg_clk)) { + dev_err(&pdev->dev, "Failed to get otg clock\n"); + return PTR_ERR(otg_clk); + } + + err = clk_enable(otg_clk); + if (err) { + clk_put(otg_clk); + return err; + } + + writel((readl(EXYNOS4_PHYPWR) | PHY1_STD_ANALOG_POWERDOWN), + EXYNOS4_PHYPWR); + + writel(readl(S5P_USBHOST_PHY_CONTROL) & ~S5P_USBHOST_PHY_ENABLE, + S5P_USBHOST_PHY_CONTROL); + + clk_disable(otg_clk); + clk_put(otg_clk); + + return 0; +} + +int s5p_usb_phy_init(struct platform_device *pdev, int type) +{ + if (type == S5P_USB_PHY_HOST) + return exynos4_usb_phy1_init(pdev); + + return -EINVAL; +} + +int s5p_usb_phy_exit(struct platform_device *pdev, int type) +{ + if (type == S5P_USB_PHY_HOST) + return exynos4_usb_phy1_exit(pdev); + + return -EINVAL; +} diff --git a/arch/arm/mach-exynos4/time.c b/arch/arm/mach-exynos4/time.c index 86b9fa0d3639..ebb8f38d5405 100644 --- a/arch/arm/mach-exynos4/time.c +++ b/arch/arm/mach-exynos4/time.c @@ -206,6 +206,7 @@ static cycle_t exynos4_pwm4_read(struct clocksource *cs) return (cycle_t) ~__raw_readl(S3C_TIMERREG(0x40)); } +#ifdef CONFIG_PM static void exynos4_pwm4_resume(struct clocksource *cs) { unsigned long pclk; @@ -218,6 +219,7 @@ static void exynos4_pwm4_resume(struct clocksource *cs) exynos4_pwm_init(4, ~0); exynos4_pwm_start(4, 1); } +#endif struct clocksource pwm_clocksource = { .name = "pwm_timer4", diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index bdd257921cfb..46adca068f2c 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -4,6 +4,7 @@ menu "Footbridge Implementations" config ARCH_CATS bool "CATS" + select CLKSRC_I8253 select FOOTBRIDGE_HOST select ISA select ISA_DMA @@ -59,6 +60,7 @@ config ARCH_EBSA285_HOST config ARCH_NETWINDER bool "NetWinder" + select CLKSRC_I8253 select FOOTBRIDGE_HOST select ISA select ISA_DMA diff --git a/arch/arm/mach-footbridge/dc21285-timer.c b/arch/arm/mach-footbridge/dc21285-timer.c index 5f1f9867fc70..121ad1d4fa39 100644 --- a/arch/arm/mach-footbridge/dc21285-timer.c +++ b/arch/arm/mach-footbridge/dc21285-timer.c @@ -103,6 +103,7 @@ static void __init footbridge_timer_init(void) clockevents_calc_mult_shift(ce, mem_fclk_21285, 5); ce->max_delta_ns = clockevent_delta2ns(0xffffff, ce); ce->min_delta_ns = clockevent_delta2ns(0x000004, ce); + ce->cpumask = cpumask_of(smp_processor_id()); clockevents_register_device(ce); } diff --git a/arch/arm/mach-footbridge/include/mach/debug-macro.S b/arch/arm/mach-footbridge/include/mach/debug-macro.S index 30b971d65815..1be2eeb7a0a0 100644 --- a/arch/arm/mach-footbridge/include/mach/debug-macro.S +++ b/arch/arm/mach-footbridge/include/mach/debug-macro.S @@ -26,6 +26,7 @@ #include #else +#include /* For EBSA285 debugging */ .equ dc21285_high, ARMCSR_BASE & 0xff000000 .equ dc21285_low, ARMCSR_BASE & 0x00ffffff @@ -36,8 +37,8 @@ .else mov \rp, #0 .endif - orr \rv, \rp, #0x42000000 - orr \rp, \rp, #dc21285_high + orr \rv, \rp, #dc21285_high + orr \rp, \rp, #0x42000000 .endm .macro senduart,rd,rx diff --git a/arch/arm/mach-footbridge/isa-timer.c b/arch/arm/mach-footbridge/isa-timer.c index 441c6ce0d555..7020f1a3feca 100644 --- a/arch/arm/mach-footbridge/isa-timer.c +++ b/arch/arm/mach-footbridge/isa-timer.c @@ -10,53 +10,16 @@ #include #include #include +#include #include #include - +#include #include #include "common.h" -#define PIT_MODE 0x43 -#define PIT_CH0 0x40 - -#define PIT_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) - -static cycle_t pit_read(struct clocksource *cs) -{ - unsigned long flags; - static int old_count; - static u32 old_jifs; - int count; - u32 jifs; - - raw_local_irq_save(flags); - - jifs = jiffies; - outb_p(0x00, PIT_MODE); /* latch the count */ - count = inb_p(PIT_CH0); /* read the latched count */ - count |= inb_p(PIT_CH0) << 8; - - if (count > old_count && jifs == old_jifs) - count = old_count; - - old_count = count; - old_jifs = jifs; - - raw_local_irq_restore(flags); - - count = (PIT_LATCH - 1) - count; - - return (cycle_t)(jifs * PIT_LATCH) + count; -} - -static struct clocksource pit_cs = { - .name = "pit", - .rating = 110, - .read = pit_read, - .mask = CLOCKSOURCE_MASK(32), -}; +DEFINE_RAW_SPINLOCK(i8253_lock); static void pit_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) @@ -121,7 +84,7 @@ static void __init isa_timer_init(void) pit_ce.max_delta_ns = clockevent_delta2ns(0x7fff, &pit_ce); pit_ce.min_delta_ns = clockevent_delta2ns(0x000f, &pit_ce); - clocksource_register_hz(&pit_cs, PIT_TICK_RATE); + clocksource_i8253_init(); setup_irq(pit_ce.irq, &pit_timer_irq); clockevents_register_device(&pit_ce); diff --git a/arch/arm/mach-gemini/board-wbd111.c b/arch/arm/mach-gemini/board-wbd111.c index af7b68a6b258..88cc422ee444 100644 --- a/arch/arm/mach-gemini/board-wbd111.c +++ b/arch/arm/mach-gemini/board-wbd111.c @@ -84,7 +84,6 @@ static struct sys_timer wbd111_timer = { .init = gemini_timer_init, }; -#ifdef CONFIG_MTD_PARTITIONS static struct mtd_partition wbd111_partitions[] = { { .name = "RedBoot", @@ -116,11 +115,7 @@ static struct mtd_partition wbd111_partitions[] = { .mask_flags = MTD_WRITEABLE, } }; -#define wbd111_num_partitions ARRAY_SIZE(wbd111_partitions) -#else -#define wbd111_partitions NULL -#define wbd111_num_partitions 0 -#endif /* CONFIG_MTD_PARTITIONS */ +#define wbd111_num_partitions ARRAY_SIZE(wbd111_partitions) static void __init wbd111_init(void) { diff --git a/arch/arm/mach-gemini/board-wbd222.c b/arch/arm/mach-gemini/board-wbd222.c index 99e5bbecf923..3a220347bc88 100644 --- a/arch/arm/mach-gemini/board-wbd222.c +++ b/arch/arm/mach-gemini/board-wbd222.c @@ -84,7 +84,6 @@ static struct sys_timer wbd222_timer = { .init = gemini_timer_init, }; -#ifdef CONFIG_MTD_PARTITIONS static struct mtd_partition wbd222_partitions[] = { { .name = "RedBoot", @@ -116,11 +115,7 @@ static struct mtd_partition wbd222_partitions[] = { .mask_flags = MTD_WRITEABLE, } }; -#define wbd222_num_partitions ARRAY_SIZE(wbd222_partitions) -#else -#define wbd222_partitions NULL -#define wbd222_num_partitions 0 -#endif /* CONFIG_MTD_PARTITIONS */ +#define wbd222_num_partitions ARRAY_SIZE(wbd222_partitions) static void __init wbd222_init(void) { diff --git a/arch/arm/mach-gemini/include/mach/uncompress.h b/arch/arm/mach-gemini/include/mach/uncompress.h index 5483f61a8061..0efa26247235 100644 --- a/arch/arm/mach-gemini/include/mach/uncompress.h +++ b/arch/arm/mach-gemini/include/mach/uncompress.h @@ -16,7 +16,7 @@ #include #include -static volatile unsigned long *UART = (unsigned long *)GEMINI_UART_BASE; +static volatile unsigned long * const UART = (unsigned long *)GEMINI_UART_BASE; /* * The following code assumes the serial port has already been diff --git a/arch/arm/mach-h720x/include/mach/memory.h b/arch/arm/mach-h720x/include/mach/memory.h index 9d3687651462..b0b3baec9acf 100644 --- a/arch/arm/mach-h720x/include/mach/memory.h +++ b/arch/arm/mach-h720x/include/mach/memory.h @@ -13,7 +13,6 @@ * There should not be more than (0xd0000000 - 0xc0000000) * bytes of RAM. */ -#define ISA_DMA_THRESHOLD (PHYS_OFFSET + SZ_256M - 1) -#define MAX_DMA_ADDRESS (PAGE_OFFSET + SZ_256M) +#define ARM_DMA_ZONE_SIZE SZ_256M #endif diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig index 56b930a13443..59c97a331136 100644 --- a/arch/arm/mach-imx/Kconfig +++ b/arch/arm/mach-imx/Kconfig @@ -1,5 +1,15 @@ config IMX_HAVE_DMA_V1 bool +# +# ARCH_MX31 and ARCH_MX35 are left for compatibility +# Some usages assume that having one of them implies not having (e.g.) ARCH_MX2. +# To easily distinguish good and reviewed from unreviewed usages new (and IMHO +# more sensible) names are used: SOC_IMX31 and SOC_IMX35 +config ARCH_MX31 + bool + +config ARCH_MX35 + bool config SOC_IMX1 bool @@ -31,6 +41,24 @@ config SOC_IMX27 select IMX_HAVE_IOMUX_V1 select MXC_AVIC +config SOC_IMX31 + bool + select CPU_V6 + select IMX_HAVE_PLATFORM_MXC_RNGA + select ARCH_MXC_AUDMUX_V2 + select ARCH_MX31 + select MXC_AVIC + +config SOC_IMX35 + bool + select CPU_V6 + select ARCH_MXC_IOMUX_V3 + select ARCH_MXC_AUDMUX_V2 + select HAVE_EPIT + select ARCH_MX35 + select MXC_AVIC + + if ARCH_MX1 comment "MX1 platforms:" @@ -40,6 +68,7 @@ config MACH_MXLADS config ARCH_MX1ADS bool "MX1ADS platform" select MACH_MXLADS + select SOC_IMX1 select IMX_HAVE_PLATFORM_IMX_I2C select IMX_HAVE_PLATFORM_IMX_UART help @@ -51,6 +80,13 @@ config MACH_SCB9328 help Say Y here if you are using a Synertronixx scb9328 board +config MACH_APF9328 + bool "APF9328" + select SOC_IMX1 + select IMX_HAVE_PLATFORM_IMX_UART + help + Say Yes here if you are using the Armadeus APF9328 development board + endif if ARCH_MX2 @@ -129,6 +165,7 @@ choice config MACH_EUKREA_MBIMXSD25_BASEBOARD bool "Eukrea MBIMXSD development board" + select IMX_HAVE_PLATFORM_GPIO_KEYS select IMX_HAVE_PLATFORM_IMX_SSI help This adds board specific devices that can be found on Eukrea's @@ -254,6 +291,7 @@ config MACH_MX27_3DS config MACH_IMX27_VISSTRIM_M10 bool "Vista Silicon i.MX27 Visstrim_m10" select SOC_IMX27 + select IMX_HAVE_PLATFORM_GPIO_KEYS select IMX_HAVE_PLATFORM_IMX_I2C select IMX_HAVE_PLATFORM_IMX_SSI select IMX_HAVE_PLATFORM_IMX_UART @@ -314,3 +352,251 @@ config MACH_IMX27IPCAM configurations for the board and its peripherals. endif + +if ARCH_MX3 + +comment "MX31 platforms:" + +config MACH_MX31ADS + bool "Support MX31ADS platforms" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_SSI + select IMX_HAVE_PLATFORM_IMX_UART + default y + help + Include support for MX31ADS platform. This includes specific + configurations for the board and its peripherals. + +config MACH_MX31ADS_WM1133_EV1 + bool "Support Wolfson Microelectronics 1133-EV1 module" + depends on MACH_MX31ADS + depends on MFD_WM8350_I2C + depends on REGULATOR_WM8350 + select MFD_WM8350_CONFIG_MODE_0 + select MFD_WM8352_CONFIG_MODE_0 + help + Include support for the Wolfson Microelectronics 1133-EV1 PMU + and audio module for the MX31ADS platform. + +config MACH_MX31LILLY + bool "Support MX31 LILLY-1131 platforms (INCO startec)" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_MMC + select IMX_HAVE_PLATFORM_SPI_IMX + select MXC_ULPI if USB_ULPI + help + Include support for mx31 based LILLY1131 modules. This includes + specific configurations for the board and its peripherals. + +config MACH_MX31LITE + bool "Support MX31 LITEKIT (LogicPD)" + select SOC_IMX31 + select MXC_ULPI if USB_ULPI + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_MMC + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_MXC_RTC + select IMX_HAVE_PLATFORM_SPI_IMX + help + Include support for MX31 LITEKIT platform. This includes specific + configurations for the board and its peripherals. + +config MACH_PCM037 + bool "Support Phytec pcm037 (i.MX31) platforms" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_MMC + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_MXC_W1 + select MXC_ULPI if USB_ULPI + help + Include support for Phytec pcm037 platform. This includes + specific configurations for the board and its peripherals. + +config MACH_PCM037_EET + bool "Support pcm037 EET board extensions" + depends on MACH_PCM037 + select IMX_HAVE_PLATFORM_GPIO_KEYS + select IMX_HAVE_PLATFORM_SPI_IMX + help + Add support for PCM037 EET baseboard extensions. If you are using the + OLED display with EET, use "video=mx3fb:CMEL-OLED" kernel + command-line parameter. + +config MACH_MX31_3DS + bool "Support MX31PDK (3DS)" + select SOC_IMX31 + select MXC_DEBUG_BOARD + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_KEYPAD + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_SPI_IMX + select MXC_ULPI if USB_ULPI + help + Include support for MX31PDK (3DS) platform. This includes specific + configurations for the board and its peripherals. + +config MACH_MX31_3DS_MXC_NAND_USE_BBT + bool "Make the MXC NAND driver use the in flash Bad Block Table" + depends on MACH_MX31_3DS + depends on MTD_NAND_MXC + help + Enable this if you want that the MXC NAND driver uses the in flash + Bad Block Table to know what blocks are bad instead of scanning the + entire flash looking for bad block markers. + +config MACH_MX31MOBOARD + bool "Support mx31moboard platforms (EPFL Mobots group)" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_MMC + select IMX_HAVE_PLATFORM_SPI_IMX + select MXC_ULPI if USB_ULPI + help + Include support for mx31moboard platform. This includes specific + configurations for the board and its peripherals. + +config MACH_QONG + bool "Support Dave/DENX QongEVB-LITE platform" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_IMX_UART + help + Include support for Dave/DENX QongEVB-LITE platform. This includes + specific configurations for the board and its peripherals. + +config MACH_ARMADILLO5X0 + bool "Support Atmark Armadillo-500 Development Base Board" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_GPIO_KEYS + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_MMC + select IMX_HAVE_PLATFORM_MXC_NAND + select MXC_ULPI if USB_ULPI + help + Include support for Atmark Armadillo-500 platform. This includes + specific configurations for the board and its peripherals. + +config MACH_KZM_ARM11_01 + bool "Support KZM-ARM11-01(Kyoto Microcomputer)" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_IMX_UART + help + Include support for KZM-ARM11-01. This includes specific + configurations for the board and its peripherals. + +config MACH_BUG + bool "Support Buglabs BUGBase platform" + select SOC_IMX31 + select IMX_HAVE_PLATFORM_IMX_UART + default y + help + Include support for BUGBase 1.3 platform. This includes specific + configurations for the board and its peripherals. + +comment "MX35 platforms:" + +config MACH_PCM043 + bool "Support Phytec pcm043 (i.MX35) platforms" + select SOC_IMX35 + select IMX_HAVE_PLATFORM_FLEXCAN + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_SSI + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX + select MXC_ULPI if USB_ULPI + help + Include support for Phytec pcm043 platform. This includes + specific configurations for the board and its peripherals. + +config MACH_MX35_3DS + bool "Support MX35PDK platform" + select SOC_IMX35 + select MXC_DEBUG_BOARD + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX + help + Include support for MX35PDK platform. This includes specific + configurations for the board and its peripherals. + +config MACH_EUKREA_CPUIMX35 + bool "Support Eukrea CPUIMX35 Platform" + select SOC_IMX35 + select IMX_HAVE_PLATFORM_FLEXCAN + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX + select MXC_ULPI if USB_ULPI + help + Include support for Eukrea CPUIMX35 platform. This includes + specific configurations for the board and its peripherals. + +choice + prompt "Baseboard" + depends on MACH_EUKREA_CPUIMX35 + default MACH_EUKREA_MBIMXSD35_BASEBOARD + +config MACH_EUKREA_MBIMXSD35_BASEBOARD + bool "Eukrea MBIMXSD development board" + select IMX_HAVE_PLATFORM_GPIO_KEYS + select IMX_HAVE_PLATFORM_IMX_SSI + select IMX_HAVE_PLATFORM_IPU_CORE + help + This adds board specific devices that can be found on Eukrea's + MBIMXSD evaluation board. + +endchoice + +config MACH_VPR200 + bool "Support VPR200 platform" + select SOC_IMX35 + select IMX_HAVE_PLATFORM_FSL_USB2_UDC + select IMX_HAVE_PLATFORM_GPIO_KEYS + select IMX_HAVE_PLATFORM_IMX2_WDT + select IMX_HAVE_PLATFORM_IMX_UART + select IMX_HAVE_PLATFORM_IMX_I2C + select IMX_HAVE_PLATFORM_IPU_CORE + select IMX_HAVE_PLATFORM_MXC_EHCI + select IMX_HAVE_PLATFORM_MXC_NAND + select IMX_HAVE_PLATFORM_SDHCI_ESDHC_IMX + help + Include support for VPR200 platform. This includes specific + configurations for the board and its peripherals. + +endif diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index b85794d27991..e9eb36dad888 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -1,9 +1,3 @@ -# -# Makefile for the linux kernel. -# - -# Object file lists. - obj-$(CONFIG_IMX_HAVE_DMA_V1) += dma-v1.o obj-$(CONFIG_ARCH_MX1) += clock-imx1.o mm-imx1.o @@ -14,18 +8,27 @@ obj-$(CONFIG_ARCH_MX25) += clock-imx25.o mm-imx25.o ehci-imx25.o obj-$(CONFIG_MACH_MX27) += cpu-imx27.o pm-imx27.o obj-$(CONFIG_MACH_MX27) += clock-imx27.o mm-imx27.o ehci-imx27.o -# Support for CMOS sensor interface -obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o +obj-$(CONFIG_SOC_IMX31) += mm-imx31.o cpu-imx31.o clock-imx31.o iomux-imx31.o ehci-imx31.o +obj-$(CONFIG_SOC_IMX35) += mm-imx35.o cpu-imx35.o clock-imx35.o ehci-imx35.o +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o +# Support for CMOS sensor interface +obj-$(CONFIG_MX1_VIDEO) += mx1-camera-fiq.o mx1-camera-fiq-ksym.o + +# i.MX1 based machines obj-$(CONFIG_ARCH_MX1ADS) += mach-mx1ads.o obj-$(CONFIG_MACH_SCB9328) += mach-scb9328.o +obj-$(CONFIG_MACH_APF9328) += mach-apf9328.o +# i.MX21 based machines obj-$(CONFIG_MACH_MX21ADS) += mach-mx21ads.o +# i.MX25 based machines obj-$(CONFIG_MACH_MX25_3DS) += mach-mx25_3ds.o obj-$(CONFIG_MACH_EUKREA_CPUIMX25) += mach-eukrea_cpuimx25.o obj-$(CONFIG_MACH_EUKREA_MBIMXSD25_BASEBOARD) += eukrea_mbimxsd25-baseboard.o +# i.MX27 based machines obj-$(CONFIG_MACH_MX27ADS) += mach-mx27ads.o obj-$(CONFIG_MACH_PCM038) += mach-pcm038.o obj-$(CONFIG_MACH_PCM970_BASEBOARD) += pcm970-baseboard.o @@ -37,3 +40,24 @@ obj-$(CONFIG_MACH_EUKREA_MBIMX27_BASEBOARD) += eukrea_mbimx27-baseboard.o obj-$(CONFIG_MACH_PCA100) += mach-pca100.o obj-$(CONFIG_MACH_MXT_TD60) += mach-mxt_td60.o obj-$(CONFIG_MACH_IMX27IPCAM) += mach-imx27ipcam.o + +# i.MX31 based machines +obj-$(CONFIG_MACH_MX31ADS) += mach-mx31ads.o +obj-$(CONFIG_MACH_MX31LILLY) += mach-mx31lilly.o mx31lilly-db.o +obj-$(CONFIG_MACH_MX31LITE) += mach-mx31lite.o mx31lite-db.o +obj-$(CONFIG_MACH_PCM037) += mach-pcm037.o +obj-$(CONFIG_MACH_PCM037_EET) += mach-pcm037_eet.o +obj-$(CONFIG_MACH_MX31_3DS) += mach-mx31_3ds.o +obj-$(CONFIG_MACH_MX31MOBOARD) += mach-mx31moboard.o mx31moboard-devboard.o \ + mx31moboard-marxbot.o mx31moboard-smartbot.o +obj-$(CONFIG_MACH_QONG) += mach-qong.o +obj-$(CONFIG_MACH_ARMADILLO5X0) += mach-armadillo5x0.o +obj-$(CONFIG_MACH_KZM_ARM11_01) += mach-kzm_arm11_01.o +obj-$(CONFIG_MACH_BUG) += mach-bug.o + +# i.MX35 based machines +obj-$(CONFIG_MACH_PCM043) += mach-pcm043.o +obj-$(CONFIG_MACH_MX35_3DS) += mach-mx35_3ds.o +obj-$(CONFIG_MACH_EUKREA_CPUIMX35) += mach-cpuimx35.o +obj-$(CONFIG_MACH_EUKREA_MBIMXSD35_BASEBOARD) += eukrea_mbimxsd35-baseboard.o +obj-$(CONFIG_MACH_VPR200) += mach-vpr200.o diff --git a/arch/arm/mach-imx/Makefile.boot b/arch/arm/mach-imx/Makefile.boot index 3953d60bff0b..ebee18b3884c 100644 --- a/arch/arm/mach-imx/Makefile.boot +++ b/arch/arm/mach-imx/Makefile.boot @@ -13,3 +13,7 @@ initrd_phys-$(CONFIG_ARCH_MX25) := 0x80800000 zreladdr-$(CONFIG_MACH_MX27) := 0xA0008000 params_phys-$(CONFIG_MACH_MX27) := 0xA0000100 initrd_phys-$(CONFIG_MACH_MX27) := 0xA0800000 + +zreladdr-$(CONFIG_ARCH_MX3) := 0x80008000 +params_phys-$(CONFIG_ARCH_MX3) := 0x80000100 +initrd_phys-$(CONFIG_ARCH_MX3) := 0x80800000 diff --git a/arch/arm/mach-imx/cache-l2x0.c b/arch/arm/mach-imx/cache-l2x0.c new file mode 100644 index 000000000000..69d1322add3c --- /dev/null +++ b/arch/arm/mach-imx/cache-l2x0.c @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2009-2010 Pengutronix + * Sascha Hauer + * Juergen Beisert + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ + +#include +#include +#include + +#include + +#include + +static int mxc_init_l2x0(void) +{ + void __iomem *l2x0_base; + void __iomem *clkctl_base; + + if (!cpu_is_mx31() && !cpu_is_mx35()) + return 0; + +/* + * First of all, we must repair broken chip settings. There are some + * i.MX35 CPUs in the wild, comming with bogus L2 cache settings. These + * misconfigured CPUs will run amok immediately when the L2 cache gets enabled. + * Workaraound is to setup the correct register setting prior enabling the + * L2 cache. This should not hurt already working CPUs, as they are using the + * same value. + */ +#define L2_MEM_VAL 0x10 + + clkctl_base = ioremap(MX35_CLKCTL_BASE_ADDR, 4096); + if (clkctl_base != NULL) { + writel(0x00000515, clkctl_base + L2_MEM_VAL); + iounmap(clkctl_base); + } else { + pr_err("L2 cache: Cannot fix timing. Trying to continue without\n"); + } + + l2x0_base = ioremap(MX3x_L2CC_BASE_ADDR, 4096); + if (IS_ERR(l2x0_base)) { + printk(KERN_ERR "remapping L2 cache area failed with %ld\n", + PTR_ERR(l2x0_base)); + return 0; + } + + l2x0_init(l2x0_base, 0x00030024, 0x00000000); + + return 0; +} +arch_initcall(mxc_init_l2x0); diff --git a/arch/arm/mach-imx/clock-imx31.c b/arch/arm/mach-imx/clock-imx31.c new file mode 100644 index 000000000000..25f343fca2b9 --- /dev/null +++ b/arch/arm/mach-imx/clock-imx31.c @@ -0,0 +1,629 @@ +/* + * Copyright 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright (C) 2008 by Sascha Hauer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include "crmregs-imx31.h" + +#define PRE_DIV_MIN_FREQ 10000000 /* Minimum Frequency after Predivider */ + +static void __calc_pre_post_dividers(u32 div, u32 *pre, u32 *post) +{ + u32 min_pre, temp_pre, old_err, err; + + if (div >= 512) { + *pre = 8; + *post = 64; + } else if (div >= 64) { + min_pre = (div - 1) / 64 + 1; + old_err = 8; + for (temp_pre = 8; temp_pre >= min_pre; temp_pre--) { + err = div % temp_pre; + if (err == 0) { + *pre = temp_pre; + break; + } + err = temp_pre - err; + if (err < old_err) { + old_err = err; + *pre = temp_pre; + } + } + *post = (div + *pre - 1) / *pre; + } else if (div <= 8) { + *pre = div; + *post = 1; + } else { + *pre = 1; + *post = div; + } +} + +static struct clk mcu_pll_clk; +static struct clk serial_pll_clk; +static struct clk ipg_clk; +static struct clk ckih_clk; + +static int cgr_enable(struct clk *clk) +{ + u32 reg; + + if (!clk->enable_reg) + return 0; + + reg = __raw_readl(clk->enable_reg); + reg |= 3 << clk->enable_shift; + __raw_writel(reg, clk->enable_reg); + + return 0; +} + +static void cgr_disable(struct clk *clk) +{ + u32 reg; + + if (!clk->enable_reg) + return; + + reg = __raw_readl(clk->enable_reg); + reg &= ~(3 << clk->enable_shift); + + /* special case for EMI clock */ + if (clk->enable_reg == MXC_CCM_CGR2 && clk->enable_shift == 8) + reg |= (1 << clk->enable_shift); + + __raw_writel(reg, clk->enable_reg); +} + +static unsigned long pll_ref_get_rate(void) +{ + unsigned long ccmr; + unsigned int prcs; + + ccmr = __raw_readl(MXC_CCM_CCMR); + prcs = (ccmr & MXC_CCM_CCMR_PRCS_MASK) >> MXC_CCM_CCMR_PRCS_OFFSET; + if (prcs == 0x1) + return CKIL_CLK_FREQ * 1024; + else + return clk_get_rate(&ckih_clk); +} + +static unsigned long usb_pll_get_rate(struct clk *clk) +{ + unsigned long reg; + + reg = __raw_readl(MXC_CCM_UPCTL); + + return mxc_decode_pll(reg, pll_ref_get_rate()); +} + +static unsigned long serial_pll_get_rate(struct clk *clk) +{ + unsigned long reg; + + reg = __raw_readl(MXC_CCM_SRPCTL); + + return mxc_decode_pll(reg, pll_ref_get_rate()); +} + +static unsigned long mcu_pll_get_rate(struct clk *clk) +{ + unsigned long reg, ccmr; + + ccmr = __raw_readl(MXC_CCM_CCMR); + + if (!(ccmr & MXC_CCM_CCMR_MPE) || (ccmr & MXC_CCM_CCMR_MDS)) + return clk_get_rate(&ckih_clk); + + reg = __raw_readl(MXC_CCM_MPCTL); + + return mxc_decode_pll(reg, pll_ref_get_rate()); +} + +static int usb_pll_enable(struct clk *clk) +{ + u32 reg; + + reg = __raw_readl(MXC_CCM_CCMR); + reg |= MXC_CCM_CCMR_UPE; + __raw_writel(reg, MXC_CCM_CCMR); + + /* No lock bit on MX31, so using max time from spec */ + udelay(80); + + return 0; +} + +static void usb_pll_disable(struct clk *clk) +{ + u32 reg; + + reg = __raw_readl(MXC_CCM_CCMR); + reg &= ~MXC_CCM_CCMR_UPE; + __raw_writel(reg, MXC_CCM_CCMR); +} + +static int serial_pll_enable(struct clk *clk) +{ + u32 reg; + + reg = __raw_readl(MXC_CCM_CCMR); + reg |= MXC_CCM_CCMR_SPE; + __raw_writel(reg, MXC_CCM_CCMR); + + /* No lock bit on MX31, so using max time from spec */ + udelay(80); + + return 0; +} + +static void serial_pll_disable(struct clk *clk) +{ + u32 reg; + + reg = __raw_readl(MXC_CCM_CCMR); + reg &= ~MXC_CCM_CCMR_SPE; + __raw_writel(reg, MXC_CCM_CCMR); +} + +#define PDR0(mask, off) ((__raw_readl(MXC_CCM_PDR0) & mask) >> off) +#define PDR1(mask, off) ((__raw_readl(MXC_CCM_PDR1) & mask) >> off) +#define PDR2(mask, off) ((__raw_readl(MXC_CCM_PDR2) & mask) >> off) + +static unsigned long mcu_main_get_rate(struct clk *clk) +{ + u32 pmcr0 = __raw_readl(MXC_CCM_PMCR0); + + if ((pmcr0 & MXC_CCM_PMCR0_DFSUP1) == MXC_CCM_PMCR0_DFSUP1_SPLL) + return clk_get_rate(&serial_pll_clk); + else + return clk_get_rate(&mcu_pll_clk); +} + +static unsigned long ahb_get_rate(struct clk *clk) +{ + unsigned long max_pdf; + + max_pdf = PDR0(MXC_CCM_PDR0_MAX_PODF_MASK, + MXC_CCM_PDR0_MAX_PODF_OFFSET); + return clk_get_rate(clk->parent) / (max_pdf + 1); +} + +static unsigned long ipg_get_rate(struct clk *clk) +{ + unsigned long ipg_pdf; + + ipg_pdf = PDR0(MXC_CCM_PDR0_IPG_PODF_MASK, + MXC_CCM_PDR0_IPG_PODF_OFFSET); + return clk_get_rate(clk->parent) / (ipg_pdf + 1); +} + +static unsigned long nfc_get_rate(struct clk *clk) +{ + unsigned long nfc_pdf; + + nfc_pdf = PDR0(MXC_CCM_PDR0_NFC_PODF_MASK, + MXC_CCM_PDR0_NFC_PODF_OFFSET); + return clk_get_rate(clk->parent) / (nfc_pdf + 1); +} + +static unsigned long hsp_get_rate(struct clk *clk) +{ + unsigned long hsp_pdf; + + hsp_pdf = PDR0(MXC_CCM_PDR0_HSP_PODF_MASK, + MXC_CCM_PDR0_HSP_PODF_OFFSET); + return clk_get_rate(clk->parent) / (hsp_pdf + 1); +} + +static unsigned long usb_get_rate(struct clk *clk) +{ + unsigned long usb_pdf, usb_prepdf; + + usb_pdf = PDR1(MXC_CCM_PDR1_USB_PODF_MASK, + MXC_CCM_PDR1_USB_PODF_OFFSET); + usb_prepdf = PDR1(MXC_CCM_PDR1_USB_PRDF_MASK, + MXC_CCM_PDR1_USB_PRDF_OFFSET); + return clk_get_rate(clk->parent) / (usb_prepdf + 1) / (usb_pdf + 1); +} + +static unsigned long csi_get_rate(struct clk *clk) +{ + u32 reg, pre, post; + + reg = __raw_readl(MXC_CCM_PDR0); + pre = (reg & MXC_CCM_PDR0_CSI_PRDF_MASK) >> + MXC_CCM_PDR0_CSI_PRDF_OFFSET; + pre++; + post = (reg & MXC_CCM_PDR0_CSI_PODF_MASK) >> + MXC_CCM_PDR0_CSI_PODF_OFFSET; + post++; + return clk_get_rate(clk->parent) / (pre * post); +} + +static unsigned long csi_round_rate(struct clk *clk, unsigned long rate) +{ + u32 pre, post, parent = clk_get_rate(clk->parent); + u32 div = parent / rate; + + if (parent % rate) + div++; + + __calc_pre_post_dividers(div, &pre, &post); + + return parent / (pre * post); +} + +static int csi_set_rate(struct clk *clk, unsigned long rate) +{ + u32 reg, div, pre, post, parent = clk_get_rate(clk->parent); + + div = parent / rate; + + if ((parent / div) != rate) + return -EINVAL; + + __calc_pre_post_dividers(div, &pre, &post); + + /* Set CSI clock divider */ + reg = __raw_readl(MXC_CCM_PDR0) & + ~(MXC_CCM_PDR0_CSI_PODF_MASK | MXC_CCM_PDR0_CSI_PRDF_MASK); + reg |= (post - 1) << MXC_CCM_PDR0_CSI_PODF_OFFSET; + reg |= (pre - 1) << MXC_CCM_PDR0_CSI_PRDF_OFFSET; + __raw_writel(reg, MXC_CCM_PDR0); + + return 0; +} + +static unsigned long ssi1_get_rate(struct clk *clk) +{ + unsigned long ssi1_pdf, ssi1_prepdf; + + ssi1_pdf = PDR1(MXC_CCM_PDR1_SSI1_PODF_MASK, + MXC_CCM_PDR1_SSI1_PODF_OFFSET); + ssi1_prepdf = PDR1(MXC_CCM_PDR1_SSI1_PRE_PODF_MASK, + MXC_CCM_PDR1_SSI1_PRE_PODF_OFFSET); + return clk_get_rate(clk->parent) / (ssi1_prepdf + 1) / (ssi1_pdf + 1); +} + +static unsigned long ssi2_get_rate(struct clk *clk) +{ + unsigned long ssi2_pdf, ssi2_prepdf; + + ssi2_pdf = PDR1(MXC_CCM_PDR1_SSI2_PODF_MASK, + MXC_CCM_PDR1_SSI2_PODF_OFFSET); + ssi2_prepdf = PDR1(MXC_CCM_PDR1_SSI2_PRE_PODF_MASK, + MXC_CCM_PDR1_SSI2_PRE_PODF_OFFSET); + return clk_get_rate(clk->parent) / (ssi2_prepdf + 1) / (ssi2_pdf + 1); +} + +static unsigned long firi_get_rate(struct clk *clk) +{ + unsigned long firi_pdf, firi_prepdf; + + firi_pdf = PDR1(MXC_CCM_PDR1_FIRI_PODF_MASK, + MXC_CCM_PDR1_FIRI_PODF_OFFSET); + firi_prepdf = PDR1(MXC_CCM_PDR1_FIRI_PRE_PODF_MASK, + MXC_CCM_PDR1_FIRI_PRE_PODF_OFFSET); + return clk_get_rate(clk->parent) / (firi_prepdf + 1) / (firi_pdf + 1); +} + +static unsigned long firi_round_rate(struct clk *clk, unsigned long rate) +{ + u32 pre, post; + u32 parent = clk_get_rate(clk->parent); + u32 div = parent / rate; + + if (parent % rate) + div++; + + __calc_pre_post_dividers(div, &pre, &post); + + return parent / (pre * post); + +} + +static int firi_set_rate(struct clk *clk, unsigned long rate) +{ + u32 reg, div, pre, post, parent = clk_get_rate(clk->parent); + + div = parent / rate; + + if ((parent / div) != rate) + return -EINVAL; + + __calc_pre_post_dividers(div, &pre, &post); + + /* Set FIRI clock divider */ + reg = __raw_readl(MXC_CCM_PDR1) & + ~(MXC_CCM_PDR1_FIRI_PODF_MASK | MXC_CCM_PDR1_FIRI_PRE_PODF_MASK); + reg |= (pre - 1) << MXC_CCM_PDR1_FIRI_PRE_PODF_OFFSET; + reg |= (post - 1) << MXC_CCM_PDR1_FIRI_PODF_OFFSET; + __raw_writel(reg, MXC_CCM_PDR1); + + return 0; +} + +static unsigned long mbx_get_rate(struct clk *clk) +{ + return clk_get_rate(clk->parent) / 2; +} + +static unsigned long mstick1_get_rate(struct clk *clk) +{ + unsigned long msti_pdf; + + msti_pdf = PDR2(MXC_CCM_PDR2_MST1_PDF_MASK, + MXC_CCM_PDR2_MST1_PDF_OFFSET); + return clk_get_rate(clk->parent) / (msti_pdf + 1); +} + +static unsigned long mstick2_get_rate(struct clk *clk) +{ + unsigned long msti_pdf; + + msti_pdf = PDR2(MXC_CCM_PDR2_MST2_PDF_MASK, + MXC_CCM_PDR2_MST2_PDF_OFFSET); + return clk_get_rate(clk->parent) / (msti_pdf + 1); +} + +static unsigned long ckih_rate; + +static unsigned long clk_ckih_get_rate(struct clk *clk) +{ + return ckih_rate; +} + +static unsigned long clk_ckil_get_rate(struct clk *clk) +{ + return CKIL_CLK_FREQ; +} + +static struct clk ckih_clk = { + .get_rate = clk_ckih_get_rate, +}; + +static struct clk mcu_pll_clk = { + .parent = &ckih_clk, + .get_rate = mcu_pll_get_rate, +}; + +static struct clk mcu_main_clk = { + .parent = &mcu_pll_clk, + .get_rate = mcu_main_get_rate, +}; + +static struct clk serial_pll_clk = { + .parent = &ckih_clk, + .get_rate = serial_pll_get_rate, + .enable = serial_pll_enable, + .disable = serial_pll_disable, +}; + +static struct clk usb_pll_clk = { + .parent = &ckih_clk, + .get_rate = usb_pll_get_rate, + .enable = usb_pll_enable, + .disable = usb_pll_disable, +}; + +static struct clk ahb_clk = { + .parent = &mcu_main_clk, + .get_rate = ahb_get_rate, +}; + +#define DEFINE_CLOCK(name, i, er, es, gr, s, p) \ + static struct clk name = { \ + .id = i, \ + .enable_reg = er, \ + .enable_shift = es, \ + .get_rate = gr, \ + .enable = cgr_enable, \ + .disable = cgr_disable, \ + .secondary = s, \ + .parent = p, \ + } + +#define DEFINE_CLOCK1(name, i, er, es, getsetround, s, p) \ + static struct clk name = { \ + .id = i, \ + .enable_reg = er, \ + .enable_shift = es, \ + .get_rate = getsetround##_get_rate, \ + .set_rate = getsetround##_set_rate, \ + .round_rate = getsetround##_round_rate, \ + .enable = cgr_enable, \ + .disable = cgr_disable, \ + .secondary = s, \ + .parent = p, \ + } + +DEFINE_CLOCK(perclk_clk, 0, NULL, 0, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(ckil_clk, 0, NULL, 0, clk_ckil_get_rate, NULL, NULL); + +DEFINE_CLOCK(sdhc1_clk, 0, MXC_CCM_CGR0, 0, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(sdhc2_clk, 1, MXC_CCM_CGR0, 2, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(gpt_clk, 0, MXC_CCM_CGR0, 4, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(epit1_clk, 0, MXC_CCM_CGR0, 6, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(epit2_clk, 1, MXC_CCM_CGR0, 8, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(iim_clk, 0, MXC_CCM_CGR0, 10, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(ata_clk, 0, MXC_CCM_CGR0, 12, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(sdma_clk1, 0, MXC_CCM_CGR0, 14, NULL, NULL, &ahb_clk); +DEFINE_CLOCK(cspi3_clk, 2, MXC_CCM_CGR0, 16, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(rng_clk, 0, MXC_CCM_CGR0, 18, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(uart1_clk, 0, MXC_CCM_CGR0, 20, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(uart2_clk, 1, MXC_CCM_CGR0, 22, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(ssi1_clk, 0, MXC_CCM_CGR0, 24, ssi1_get_rate, NULL, &serial_pll_clk); +DEFINE_CLOCK(i2c1_clk, 0, MXC_CCM_CGR0, 26, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(i2c2_clk, 1, MXC_CCM_CGR0, 28, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(i2c3_clk, 2, MXC_CCM_CGR0, 30, NULL, NULL, &perclk_clk); + +DEFINE_CLOCK(mpeg4_clk, 0, MXC_CCM_CGR1, 0, NULL, NULL, &ahb_clk); +DEFINE_CLOCK(mstick1_clk, 0, MXC_CCM_CGR1, 2, mstick1_get_rate, NULL, &usb_pll_clk); +DEFINE_CLOCK(mstick2_clk, 1, MXC_CCM_CGR1, 4, mstick2_get_rate, NULL, &usb_pll_clk); +DEFINE_CLOCK1(csi_clk, 0, MXC_CCM_CGR1, 6, csi, NULL, &serial_pll_clk); +DEFINE_CLOCK(rtc_clk, 0, MXC_CCM_CGR1, 8, NULL, NULL, &ckil_clk); +DEFINE_CLOCK(wdog_clk, 0, MXC_CCM_CGR1, 10, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(pwm_clk, 0, MXC_CCM_CGR1, 12, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(usb_clk2, 0, MXC_CCM_CGR1, 18, usb_get_rate, NULL, &ahb_clk); +DEFINE_CLOCK(kpp_clk, 0, MXC_CCM_CGR1, 20, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(ipu_clk, 0, MXC_CCM_CGR1, 22, hsp_get_rate, NULL, &mcu_main_clk); +DEFINE_CLOCK(uart3_clk, 2, MXC_CCM_CGR1, 24, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(uart4_clk, 3, MXC_CCM_CGR1, 26, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(uart5_clk, 4, MXC_CCM_CGR1, 28, NULL, NULL, &perclk_clk); +DEFINE_CLOCK(owire_clk, 0, MXC_CCM_CGR1, 30, NULL, NULL, &perclk_clk); + +DEFINE_CLOCK(ssi2_clk, 1, MXC_CCM_CGR2, 0, ssi2_get_rate, NULL, &serial_pll_clk); +DEFINE_CLOCK(cspi1_clk, 0, MXC_CCM_CGR2, 2, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(cspi2_clk, 1, MXC_CCM_CGR2, 4, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(mbx_clk, 0, MXC_CCM_CGR2, 6, mbx_get_rate, NULL, &ahb_clk); +DEFINE_CLOCK(emi_clk, 0, MXC_CCM_CGR2, 8, NULL, NULL, &ahb_clk); +DEFINE_CLOCK(rtic_clk, 0, MXC_CCM_CGR2, 10, NULL, NULL, &ahb_clk); +DEFINE_CLOCK1(firi_clk, 0, MXC_CCM_CGR2, 12, firi, NULL, &usb_pll_clk); + +DEFINE_CLOCK(sdma_clk2, 0, NULL, 0, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(usb_clk1, 0, NULL, 0, usb_get_rate, NULL, &usb_pll_clk); +DEFINE_CLOCK(nfc_clk, 0, NULL, 0, nfc_get_rate, NULL, &ahb_clk); +DEFINE_CLOCK(scc_clk, 0, NULL, 0, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(ipg_clk, 0, NULL, 0, ipg_get_rate, NULL, &ahb_clk); + +#define _REGISTER_CLOCK(d, n, c) \ + { \ + .dev_id = d, \ + .con_id = n, \ + .clk = &c, \ + }, + +static struct clk_lookup lookups[] = { + _REGISTER_CLOCK(NULL, "emi", emi_clk) + _REGISTER_CLOCK("imx31-cspi.0", NULL, cspi1_clk) + _REGISTER_CLOCK("imx31-cspi.1", NULL, cspi2_clk) + _REGISTER_CLOCK("imx31-cspi.2", NULL, cspi3_clk) + _REGISTER_CLOCK(NULL, "gpt", gpt_clk) + _REGISTER_CLOCK(NULL, "pwm", pwm_clk) + _REGISTER_CLOCK("imx2-wdt.0", NULL, wdog_clk) + _REGISTER_CLOCK(NULL, "rtc", rtc_clk) + _REGISTER_CLOCK(NULL, "epit", epit1_clk) + _REGISTER_CLOCK(NULL, "epit", epit2_clk) + _REGISTER_CLOCK("mxc_nand.0", NULL, nfc_clk) + _REGISTER_CLOCK("ipu-core", NULL, ipu_clk) + _REGISTER_CLOCK("mx3_sdc_fb", NULL, ipu_clk) + _REGISTER_CLOCK(NULL, "kpp", kpp_clk) + _REGISTER_CLOCK("mxc-ehci.0", "usb", usb_clk1) + _REGISTER_CLOCK("mxc-ehci.0", "usb_ahb", usb_clk2) + _REGISTER_CLOCK("mxc-ehci.1", "usb", usb_clk1) + _REGISTER_CLOCK("mxc-ehci.1", "usb_ahb", usb_clk2) + _REGISTER_CLOCK("mxc-ehci.2", "usb", usb_clk1) + _REGISTER_CLOCK("mxc-ehci.2", "usb_ahb", usb_clk2) + _REGISTER_CLOCK("fsl-usb2-udc", "usb", usb_clk1) + _REGISTER_CLOCK("fsl-usb2-udc", "usb_ahb", usb_clk2) + _REGISTER_CLOCK("mx3-camera.0", NULL, csi_clk) + _REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk) + _REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk) + _REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk) + _REGISTER_CLOCK("imx-uart.3", NULL, uart4_clk) + _REGISTER_CLOCK("imx-uart.4", NULL, uart5_clk) + _REGISTER_CLOCK("imx-i2c.0", NULL, i2c1_clk) + _REGISTER_CLOCK("imx-i2c.1", NULL, i2c2_clk) + _REGISTER_CLOCK("imx-i2c.2", NULL, i2c3_clk) + _REGISTER_CLOCK("mxc_w1.0", NULL, owire_clk) + _REGISTER_CLOCK("mxc-mmc.0", NULL, sdhc1_clk) + _REGISTER_CLOCK("mxc-mmc.1", NULL, sdhc2_clk) + _REGISTER_CLOCK("imx-ssi.0", NULL, ssi1_clk) + _REGISTER_CLOCK("imx-ssi.1", NULL, ssi2_clk) + _REGISTER_CLOCK(NULL, "firi", firi_clk) + _REGISTER_CLOCK(NULL, "ata", ata_clk) + _REGISTER_CLOCK(NULL, "rtic", rtic_clk) + _REGISTER_CLOCK(NULL, "rng", rng_clk) + _REGISTER_CLOCK("imx-sdma", NULL, sdma_clk1) + _REGISTER_CLOCK(NULL, "sdma_ipg", sdma_clk2) + _REGISTER_CLOCK(NULL, "mstick", mstick1_clk) + _REGISTER_CLOCK(NULL, "mstick", mstick2_clk) + _REGISTER_CLOCK(NULL, "scc", scc_clk) + _REGISTER_CLOCK(NULL, "iim", iim_clk) + _REGISTER_CLOCK(NULL, "mpeg4", mpeg4_clk) + _REGISTER_CLOCK(NULL, "mbx", mbx_clk) +}; + +int __init mx31_clocks_init(unsigned long fref) +{ + u32 reg; + + ckih_rate = fref; + + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); + + /* change the csi_clk parent if necessary */ + reg = __raw_readl(MXC_CCM_CCMR); + if (!(reg & MXC_CCM_CCMR_CSCS)) + if (clk_set_parent(&csi_clk, &usb_pll_clk)) + pr_err("%s: error changing csi_clk parent\n", __func__); + + + /* Turn off all possible clocks */ + __raw_writel((3 << 4), MXC_CCM_CGR0); + __raw_writel(0, MXC_CCM_CGR1); + __raw_writel((3 << 8) | (3 << 14) | (3 << 16)| + 1 << 27 | 1 << 28, /* Bit 27 and 28 are not defined for + MX32, but still required to be set */ + MXC_CCM_CGR2); + + /* + * Before turning off usb_pll make sure ipg_per_clk is generated + * by ipg_clk and not usb_pll. + */ + __raw_writel(__raw_readl(MXC_CCM_CCMR) | (1 << 24), MXC_CCM_CCMR); + + usb_pll_disable(&usb_pll_clk); + + pr_info("Clock input source is %ld\n", clk_get_rate(&ckih_clk)); + + clk_enable(&gpt_clk); + clk_enable(&emi_clk); + clk_enable(&iim_clk); + + clk_enable(&serial_pll_clk); + + mx31_read_cpu_rev(); + + if (mx31_revision() >= IMX_CHIP_REVISION_2_0) { + reg = __raw_readl(MXC_CCM_PMCR1); + /* No PLL restart on DVFS switch; enable auto EMI handshake */ + reg |= MXC_CCM_PMCR1_PLLRDIS | MXC_CCM_PMCR1_EMIRQ_EN; + __raw_writel(reg, MXC_CCM_PMCR1); + } + + mxc_timer_init(&ipg_clk, MX31_IO_ADDRESS(MX31_GPT1_BASE_ADDR), + MX31_INT_GPT); + + return 0; +} diff --git a/arch/arm/mach-imx/clock-imx35.c b/arch/arm/mach-imx/clock-imx35.c new file mode 100644 index 000000000000..5a4cc1ea405b --- /dev/null +++ b/arch/arm/mach-imx/clock-imx35.c @@ -0,0 +1,549 @@ +/* + * Copyright (C) 2009 by Sascha Hauer, Pengutronix + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CCM_BASE MX35_IO_ADDRESS(MX35_CCM_BASE_ADDR) + +#define CCM_CCMR 0x00 +#define CCM_PDR0 0x04 +#define CCM_PDR1 0x08 +#define CCM_PDR2 0x0C +#define CCM_PDR3 0x10 +#define CCM_PDR4 0x14 +#define CCM_RCSR 0x18 +#define CCM_MPCTL 0x1C +#define CCM_PPCTL 0x20 +#define CCM_ACMR 0x24 +#define CCM_COSR 0x28 +#define CCM_CGR0 0x2C +#define CCM_CGR1 0x30 +#define CCM_CGR2 0x34 +#define CCM_CGR3 0x38 + +#ifdef HAVE_SET_RATE_SUPPORT +static void calc_dividers(u32 div, u32 *pre, u32 *post, u32 maxpost) +{ + u32 min_pre, temp_pre, old_err, err; + + min_pre = (div - 1) / maxpost + 1; + old_err = 8; + + for (temp_pre = 8; temp_pre >= min_pre; temp_pre--) { + if (div > (temp_pre * maxpost)) + break; + + if (div < (temp_pre * temp_pre)) + continue; + + err = div % temp_pre; + + if (err == 0) { + *pre = temp_pre; + break; + } + + err = temp_pre - err; + + if (err < old_err) { + old_err = err; + *pre = temp_pre; + } + } + + *post = (div + *pre - 1) / *pre; +} + +/* get the best values for a 3-bit divider combined with a 6-bit divider */ +static void calc_dividers_3_6(u32 div, u32 *pre, u32 *post) +{ + if (div >= 512) { + *pre = 8; + *post = 64; + } else if (div >= 64) { + calc_dividers(div, pre, post, 64); + } else if (div <= 8) { + *pre = div; + *post = 1; + } else { + *pre = 1; + *post = div; + } +} + +/* get the best values for two cascaded 3-bit dividers */ +static void calc_dividers_3_3(u32 div, u32 *pre, u32 *post) +{ + if (div >= 64) { + *pre = *post = 8; + } else if (div > 8) { + calc_dividers(div, pre, post, 8); + } else { + *pre = 1; + *post = div; + } +} +#endif + +static unsigned long get_rate_mpll(void) +{ + ulong mpctl = __raw_readl(CCM_BASE + CCM_MPCTL); + + return mxc_decode_pll(mpctl, 24000000); +} + +static unsigned long get_rate_ppll(void) +{ + ulong ppctl = __raw_readl(CCM_BASE + CCM_PPCTL); + + return mxc_decode_pll(ppctl, 24000000); +} + +struct arm_ahb_div { + unsigned char arm, ahb, sel; +}; + +static struct arm_ahb_div clk_consumer[] = { + { .arm = 1, .ahb = 4, .sel = 0}, + { .arm = 1, .ahb = 3, .sel = 1}, + { .arm = 2, .ahb = 2, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, + { .arm = 4, .ahb = 1, .sel = 0}, + { .arm = 1, .ahb = 5, .sel = 0}, + { .arm = 1, .ahb = 8, .sel = 0}, + { .arm = 1, .ahb = 6, .sel = 1}, + { .arm = 2, .ahb = 4, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, + { .arm = 4, .ahb = 2, .sel = 0}, + { .arm = 0, .ahb = 0, .sel = 0}, +}; + +static unsigned long get_rate_arm(void) +{ + unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0); + struct arm_ahb_div *aad; + unsigned long fref = get_rate_mpll(); + + aad = &clk_consumer[(pdr0 >> 16) & 0xf]; + if (aad->sel) + fref = fref * 3 / 4; + + return fref / aad->arm; +} + +static unsigned long get_rate_ahb(struct clk *clk) +{ + unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0); + struct arm_ahb_div *aad; + unsigned long fref = get_rate_arm(); + + aad = &clk_consumer[(pdr0 >> 16) & 0xf]; + + return fref / aad->ahb; +} + +static unsigned long get_rate_ipg(struct clk *clk) +{ + return get_rate_ahb(NULL) >> 1; +} + +static unsigned long get_rate_uart(struct clk *clk) +{ + unsigned long pdr3 = __raw_readl(CCM_BASE + CCM_PDR3); + unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4); + unsigned long div = ((pdr4 >> 10) & 0x3f) + 1; + + if (pdr3 & (1 << 14)) + return get_rate_arm() / div; + else + return get_rate_ppll() / div; +} + +static unsigned long get_rate_sdhc(struct clk *clk) +{ + unsigned long pdr3 = __raw_readl(CCM_BASE + CCM_PDR3); + unsigned long div, rate; + + if (pdr3 & (1 << 6)) + rate = get_rate_arm(); + else + rate = get_rate_ppll(); + + switch (clk->id) { + default: + case 0: + div = pdr3 & 0x3f; + break; + case 1: + div = (pdr3 >> 8) & 0x3f; + break; + case 2: + div = (pdr3 >> 16) & 0x3f; + break; + } + + return rate / (div + 1); +} + +static unsigned long get_rate_mshc(struct clk *clk) +{ + unsigned long pdr1 = __raw_readl(CCM_BASE + CCM_PDR1); + unsigned long div1, div2, rate; + + if (pdr1 & (1 << 7)) + rate = get_rate_arm(); + else + rate = get_rate_ppll(); + + div1 = (pdr1 >> 29) & 0x7; + div2 = (pdr1 >> 22) & 0x3f; + + return rate / ((div1 + 1) * (div2 + 1)); +} + +static unsigned long get_rate_ssi(struct clk *clk) +{ + unsigned long pdr2 = __raw_readl(CCM_BASE + CCM_PDR2); + unsigned long div1, div2, rate; + + if (pdr2 & (1 << 6)) + rate = get_rate_arm(); + else + rate = get_rate_ppll(); + + switch (clk->id) { + default: + case 0: + div1 = pdr2 & 0x3f; + div2 = (pdr2 >> 24) & 0x7; + break; + case 1: + div1 = (pdr2 >> 8) & 0x3f; + div2 = (pdr2 >> 27) & 0x7; + break; + } + + return rate / ((div1 + 1) * (div2 + 1)); +} + +static unsigned long get_rate_csi(struct clk *clk) +{ + unsigned long pdr2 = __raw_readl(CCM_BASE + CCM_PDR2); + unsigned long rate; + + if (pdr2 & (1 << 7)) + rate = get_rate_arm(); + else + rate = get_rate_ppll(); + + return rate / (((pdr2 >> 16) & 0x3f) + 1); +} + +static unsigned long get_rate_otg(struct clk *clk) +{ + unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4); + unsigned long rate; + + if (pdr4 & (1 << 9)) + rate = get_rate_arm(); + else + rate = get_rate_ppll(); + + return rate / (((pdr4 >> 22) & 0x3f) + 1); +} + +static unsigned long get_rate_ipg_per(struct clk *clk) +{ + unsigned long pdr0 = __raw_readl(CCM_BASE + CCM_PDR0); + unsigned long pdr4 = __raw_readl(CCM_BASE + CCM_PDR4); + unsigned long div; + + if (pdr0 & (1 << 26)) { + div = (pdr4 >> 16) & 0x3f; + return get_rate_arm() / (div + 1); + } else { + div = (pdr0 >> 12) & 0x7; + return get_rate_ahb(NULL) / (div + 1); + } +} + +static unsigned long get_rate_hsp(struct clk *clk) +{ + unsigned long hsp_podf = (__raw_readl(CCM_BASE + CCM_PDR0) >> 20) & 0x03; + unsigned long fref = get_rate_mpll(); + + if (fref > 400 * 1000 * 1000) { + switch (hsp_podf) { + case 0: + return fref >> 2; + case 1: + return fref >> 3; + case 2: + return fref / 3; + } + } else { + switch (hsp_podf) { + case 0: + case 2: + return fref / 3; + case 1: + return fref / 6; + } + } + + return 0; +} + +static int clk_cgr_enable(struct clk *clk) +{ + u32 reg; + + reg = __raw_readl(clk->enable_reg); + reg |= 3 << clk->enable_shift; + __raw_writel(reg, clk->enable_reg); + + return 0; +} + +static void clk_cgr_disable(struct clk *clk) +{ + u32 reg; + + reg = __raw_readl(clk->enable_reg); + reg &= ~(3 << clk->enable_shift); + __raw_writel(reg, clk->enable_reg); +} + +#define DEFINE_CLOCK(name, i, er, es, gr, sr) \ + static struct clk name = { \ + .id = i, \ + .enable_reg = CCM_BASE + er, \ + .enable_shift = es, \ + .get_rate = gr, \ + .set_rate = sr, \ + .enable = clk_cgr_enable, \ + .disable = clk_cgr_disable, \ + } + +DEFINE_CLOCK(asrc_clk, 0, CCM_CGR0, 0, NULL, NULL); +DEFINE_CLOCK(ata_clk, 0, CCM_CGR0, 2, get_rate_ipg, NULL); +/* DEFINE_CLOCK(audmux_clk, 0, CCM_CGR0, 4, NULL, NULL); */ +DEFINE_CLOCK(can1_clk, 0, CCM_CGR0, 6, get_rate_ipg, NULL); +DEFINE_CLOCK(can2_clk, 1, CCM_CGR0, 8, get_rate_ipg, NULL); +DEFINE_CLOCK(cspi1_clk, 0, CCM_CGR0, 10, get_rate_ipg, NULL); +DEFINE_CLOCK(cspi2_clk, 1, CCM_CGR0, 12, get_rate_ipg, NULL); +DEFINE_CLOCK(ect_clk, 0, CCM_CGR0, 14, get_rate_ipg, NULL); +DEFINE_CLOCK(edio_clk, 0, CCM_CGR0, 16, NULL, NULL); +DEFINE_CLOCK(emi_clk, 0, CCM_CGR0, 18, get_rate_ipg, NULL); +DEFINE_CLOCK(epit1_clk, 0, CCM_CGR0, 20, get_rate_ipg, NULL); +DEFINE_CLOCK(epit2_clk, 1, CCM_CGR0, 22, get_rate_ipg, NULL); +DEFINE_CLOCK(esai_clk, 0, CCM_CGR0, 24, NULL, NULL); +DEFINE_CLOCK(esdhc1_clk, 0, CCM_CGR0, 26, get_rate_sdhc, NULL); +DEFINE_CLOCK(esdhc2_clk, 1, CCM_CGR0, 28, get_rate_sdhc, NULL); +DEFINE_CLOCK(esdhc3_clk, 2, CCM_CGR0, 30, get_rate_sdhc, NULL); + +DEFINE_CLOCK(fec_clk, 0, CCM_CGR1, 0, get_rate_ipg, NULL); +DEFINE_CLOCK(gpio1_clk, 0, CCM_CGR1, 2, NULL, NULL); +DEFINE_CLOCK(gpio2_clk, 1, CCM_CGR1, 4, NULL, NULL); +DEFINE_CLOCK(gpio3_clk, 2, CCM_CGR1, 6, NULL, NULL); +DEFINE_CLOCK(gpt_clk, 0, CCM_CGR1, 8, get_rate_ipg, NULL); +DEFINE_CLOCK(i2c1_clk, 0, CCM_CGR1, 10, get_rate_ipg_per, NULL); +DEFINE_CLOCK(i2c2_clk, 1, CCM_CGR1, 12, get_rate_ipg_per, NULL); +DEFINE_CLOCK(i2c3_clk, 2, CCM_CGR1, 14, get_rate_ipg_per, NULL); +DEFINE_CLOCK(iomuxc_clk, 0, CCM_CGR1, 16, NULL, NULL); +DEFINE_CLOCK(ipu_clk, 0, CCM_CGR1, 18, get_rate_hsp, NULL); +DEFINE_CLOCK(kpp_clk, 0, CCM_CGR1, 20, get_rate_ipg, NULL); +DEFINE_CLOCK(mlb_clk, 0, CCM_CGR1, 22, get_rate_ahb, NULL); +DEFINE_CLOCK(mshc_clk, 0, CCM_CGR1, 24, get_rate_mshc, NULL); +DEFINE_CLOCK(owire_clk, 0, CCM_CGR1, 26, get_rate_ipg_per, NULL); +DEFINE_CLOCK(pwm_clk, 0, CCM_CGR1, 28, get_rate_ipg_per, NULL); +DEFINE_CLOCK(rngc_clk, 0, CCM_CGR1, 30, get_rate_ipg, NULL); + +DEFINE_CLOCK(rtc_clk, 0, CCM_CGR2, 0, get_rate_ipg, NULL); +DEFINE_CLOCK(rtic_clk, 0, CCM_CGR2, 2, get_rate_ahb, NULL); +DEFINE_CLOCK(scc_clk, 0, CCM_CGR2, 4, get_rate_ipg, NULL); +DEFINE_CLOCK(sdma_clk, 0, CCM_CGR2, 6, NULL, NULL); +DEFINE_CLOCK(spba_clk, 0, CCM_CGR2, 8, get_rate_ipg, NULL); +DEFINE_CLOCK(spdif_clk, 0, CCM_CGR2, 10, NULL, NULL); +DEFINE_CLOCK(ssi1_clk, 0, CCM_CGR2, 12, get_rate_ssi, NULL); +DEFINE_CLOCK(ssi2_clk, 1, CCM_CGR2, 14, get_rate_ssi, NULL); +DEFINE_CLOCK(uart1_clk, 0, CCM_CGR2, 16, get_rate_uart, NULL); +DEFINE_CLOCK(uart2_clk, 1, CCM_CGR2, 18, get_rate_uart, NULL); +DEFINE_CLOCK(uart3_clk, 2, CCM_CGR2, 20, get_rate_uart, NULL); +DEFINE_CLOCK(usbotg_clk, 0, CCM_CGR2, 22, get_rate_otg, NULL); +DEFINE_CLOCK(wdog_clk, 0, CCM_CGR2, 24, NULL, NULL); +DEFINE_CLOCK(max_clk, 0, CCM_CGR2, 26, NULL, NULL); +DEFINE_CLOCK(audmux_clk, 0, CCM_CGR2, 30, NULL, NULL); + +DEFINE_CLOCK(csi_clk, 0, CCM_CGR3, 0, get_rate_csi, NULL); +DEFINE_CLOCK(iim_clk, 0, CCM_CGR3, 2, NULL, NULL); +DEFINE_CLOCK(gpu2d_clk, 0, CCM_CGR3, 4, NULL, NULL); + +DEFINE_CLOCK(usbahb_clk, 0, 0, 0, get_rate_ahb, NULL); + +static int clk_dummy_enable(struct clk *clk) +{ + return 0; +} + +static void clk_dummy_disable(struct clk *clk) +{ +} + +static unsigned long get_rate_nfc(struct clk *clk) +{ + unsigned long div1; + + div1 = (__raw_readl(CCM_BASE + CCM_PDR4) >> 28) + 1; + + return get_rate_ahb(NULL) / div1; +} + +/* NAND Controller: It seems it can't be disabled */ +static struct clk nfc_clk = { + .id = 0, + .enable_reg = 0, + .enable_shift = 0, + .get_rate = get_rate_nfc, + .set_rate = NULL, /* set_rate_nfc, */ + .enable = clk_dummy_enable, + .disable = clk_dummy_disable +}; + +#define _REGISTER_CLOCK(d, n, c) \ + { \ + .dev_id = d, \ + .con_id = n, \ + .clk = &c, \ + }, + +static struct clk_lookup lookups[] = { + _REGISTER_CLOCK(NULL, "asrc", asrc_clk) + _REGISTER_CLOCK(NULL, "ata", ata_clk) + _REGISTER_CLOCK("flexcan.0", NULL, can1_clk) + _REGISTER_CLOCK("flexcan.1", NULL, can2_clk) + _REGISTER_CLOCK("imx35-cspi.0", NULL, cspi1_clk) + _REGISTER_CLOCK("imx35-cspi.1", NULL, cspi2_clk) + _REGISTER_CLOCK(NULL, "ect", ect_clk) + _REGISTER_CLOCK(NULL, "edio", edio_clk) + _REGISTER_CLOCK(NULL, "emi", emi_clk) + _REGISTER_CLOCK("imx-epit.0", NULL, epit1_clk) + _REGISTER_CLOCK("imx-epit.1", NULL, epit2_clk) + _REGISTER_CLOCK(NULL, "esai", esai_clk) + _REGISTER_CLOCK("sdhci-esdhc-imx.0", NULL, esdhc1_clk) + _REGISTER_CLOCK("sdhci-esdhc-imx.1", NULL, esdhc2_clk) + _REGISTER_CLOCK("sdhci-esdhc-imx.2", NULL, esdhc3_clk) + _REGISTER_CLOCK("fec.0", NULL, fec_clk) + _REGISTER_CLOCK(NULL, "gpio", gpio1_clk) + _REGISTER_CLOCK(NULL, "gpio", gpio2_clk) + _REGISTER_CLOCK(NULL, "gpio", gpio3_clk) + _REGISTER_CLOCK("gpt.0", NULL, gpt_clk) + _REGISTER_CLOCK("imx-i2c.0", NULL, i2c1_clk) + _REGISTER_CLOCK("imx-i2c.1", NULL, i2c2_clk) + _REGISTER_CLOCK("imx-i2c.2", NULL, i2c3_clk) + _REGISTER_CLOCK(NULL, "iomuxc", iomuxc_clk) + _REGISTER_CLOCK("ipu-core", NULL, ipu_clk) + _REGISTER_CLOCK("mx3_sdc_fb", NULL, ipu_clk) + _REGISTER_CLOCK(NULL, "kpp", kpp_clk) + _REGISTER_CLOCK(NULL, "mlb", mlb_clk) + _REGISTER_CLOCK(NULL, "mshc", mshc_clk) + _REGISTER_CLOCK("mxc_w1", NULL, owire_clk) + _REGISTER_CLOCK(NULL, "pwm", pwm_clk) + _REGISTER_CLOCK(NULL, "rngc", rngc_clk) + _REGISTER_CLOCK(NULL, "rtc", rtc_clk) + _REGISTER_CLOCK(NULL, "rtic", rtic_clk) + _REGISTER_CLOCK(NULL, "scc", scc_clk) + _REGISTER_CLOCK("imx-sdma", NULL, sdma_clk) + _REGISTER_CLOCK(NULL, "spba", spba_clk) + _REGISTER_CLOCK(NULL, "spdif", spdif_clk) + _REGISTER_CLOCK("imx-ssi.0", NULL, ssi1_clk) + _REGISTER_CLOCK("imx-ssi.1", NULL, ssi2_clk) + _REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk) + _REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk) + _REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk) + _REGISTER_CLOCK("mxc-ehci.0", "usb", usbotg_clk) + _REGISTER_CLOCK("mxc-ehci.1", "usb", usbotg_clk) + _REGISTER_CLOCK("mxc-ehci.2", "usb", usbotg_clk) + _REGISTER_CLOCK("fsl-usb2-udc", "usb", usbotg_clk) + _REGISTER_CLOCK("fsl-usb2-udc", "usb_ahb", usbahb_clk) + _REGISTER_CLOCK("imx2-wdt.0", NULL, wdog_clk) + _REGISTER_CLOCK(NULL, "max", max_clk) + _REGISTER_CLOCK(NULL, "audmux", audmux_clk) + _REGISTER_CLOCK(NULL, "csi", csi_clk) + _REGISTER_CLOCK(NULL, "iim", iim_clk) + _REGISTER_CLOCK(NULL, "gpu2d", gpu2d_clk) + _REGISTER_CLOCK("mxc_nand.0", NULL, nfc_clk) +}; + +int __init mx35_clocks_init() +{ + unsigned int cgr2 = 3 << 26, cgr3 = 0; + +#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_ICEDCC) + cgr2 |= 3 << 16; +#endif + + clkdev_add_table(lookups, ARRAY_SIZE(lookups)); + + /* Turn off all clocks except the ones we need to survive, namely: + * EMI, GPIO1/2/3, GPT, IOMUX, MAX and eventually uart + */ + __raw_writel((3 << 18), CCM_BASE + CCM_CGR0); + __raw_writel((3 << 2) | (3 << 4) | (3 << 6) | (3 << 8) | (3 << 16), + CCM_BASE + CCM_CGR1); + + /* + * Check if we came up in internal boot mode. If yes, we need some + * extra clocks turned on, otherwise the MX35 boot ROM code will + * hang after a watchdog reset. + */ + if (!(__raw_readl(CCM_BASE + CCM_RCSR) & (3 << 10))) { + /* Additionally turn on UART1, SCC, and IIM clocks */ + cgr2 |= 3 << 16 | 3 << 4; + cgr3 |= 3 << 2; + } + + __raw_writel(cgr2, CCM_BASE + CCM_CGR2); + __raw_writel(cgr3, CCM_BASE + CCM_CGR3); + + clk_enable(&iim_clk); + mx35_read_cpu_rev(); + +#ifdef CONFIG_MXC_USE_EPIT + epit_timer_init(&epit1_clk, + MX35_IO_ADDRESS(MX35_EPIT1_BASE_ADDR), MX35_INT_EPIT1); +#else + mxc_timer_init(&gpt_clk, + MX35_IO_ADDRESS(MX35_GPT1_BASE_ADDR), MX35_INT_GPT); +#endif + + return 0; +} diff --git a/arch/arm/mach-imx/cpu-imx31.c b/arch/arm/mach-imx/cpu-imx31.c new file mode 100644 index 000000000000..a3780700a882 --- /dev/null +++ b/arch/arm/mach-imx/cpu-imx31.c @@ -0,0 +1,57 @@ +/* + * MX31 CPU type detection + * + * Copyright (c) 2009 Daniel Mack + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include + +unsigned int mx31_cpu_rev; +EXPORT_SYMBOL(mx31_cpu_rev); + +static struct { + u8 srev; + const char *name; + const char *v; + unsigned int rev; +} mx31_cpu_type[] __initdata = { + { .srev = 0x00, .name = "i.MX31(L)", .v = "1.0", .rev = IMX_CHIP_REVISION_1_0 }, + { .srev = 0x10, .name = "i.MX31", .v = "1.1", .rev = IMX_CHIP_REVISION_1_1 }, + { .srev = 0x11, .name = "i.MX31L", .v = "1.1", .rev = IMX_CHIP_REVISION_1_1 }, + { .srev = 0x12, .name = "i.MX31", .v = "1.15", .rev = IMX_CHIP_REVISION_1_1 }, + { .srev = 0x13, .name = "i.MX31L", .v = "1.15", .rev = IMX_CHIP_REVISION_1_1 }, + { .srev = 0x14, .name = "i.MX31", .v = "1.2", .rev = IMX_CHIP_REVISION_1_2 }, + { .srev = 0x15, .name = "i.MX31L", .v = "1.2", .rev = IMX_CHIP_REVISION_1_2 }, + { .srev = 0x28, .name = "i.MX31", .v = "2.0", .rev = IMX_CHIP_REVISION_2_0 }, + { .srev = 0x29, .name = "i.MX31L", .v = "2.0", .rev = IMX_CHIP_REVISION_2_0 }, +}; + +void __init mx31_read_cpu_rev(void) +{ + u32 i, srev; + + /* read SREV register from IIM module */ + srev = __raw_readl(MX31_IO_ADDRESS(MX31_IIM_BASE_ADDR + MXC_IIMSREV)); + + for (i = 0; i < ARRAY_SIZE(mx31_cpu_type); i++) + if (srev == mx31_cpu_type[i].srev) { + printk(KERN_INFO + "CPU identified as %s, silicon rev %s\n", + mx31_cpu_type[i].name, mx31_cpu_type[i].v); + + mx31_cpu_rev = mx31_cpu_type[i].rev; + return; + } + + mx31_cpu_rev = IMX_CHIP_REVISION_UNKNOWN; + + printk(KERN_WARNING "Unknown CPU identifier. srev = %02x\n", srev); +} diff --git a/arch/arm/mach-imx/cpu-imx35.c b/arch/arm/mach-imx/cpu-imx35.c new file mode 100644 index 000000000000..6637cd819ecb --- /dev/null +++ b/arch/arm/mach-imx/cpu-imx35.c @@ -0,0 +1,44 @@ +/* + * MX35 CPU type detection + * + * Copyright (c) 2009 Daniel Mack + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include +#include +#include +#include + +unsigned int mx35_cpu_rev; +EXPORT_SYMBOL(mx35_cpu_rev); + +void __init mx35_read_cpu_rev(void) +{ + u32 rev; + char *srev; + + rev = __raw_readl(MX35_IO_ADDRESS(MX35_IIM_BASE_ADDR + MXC_IIMSREV)); + switch (rev) { + case 0x00: + mx35_cpu_rev = IMX_CHIP_REVISION_1_0; + srev = "1.0"; + break; + case 0x10: + mx35_cpu_rev = IMX_CHIP_REVISION_2_0; + srev = "2.0"; + break; + case 0x11: + mx35_cpu_rev = IMX_CHIP_REVISION_2_1; + srev = "2.1"; + break; + default: + mx35_cpu_rev = IMX_CHIP_REVISION_UNKNOWN; + srev = "unknown"; + } + + printk(KERN_INFO "CPU identified as i.MX35, silicon rev %s\n", srev); +} diff --git a/arch/arm/mach-mx3/crm_regs.h b/arch/arm/mach-imx/crmregs-imx31.h similarity index 100% rename from arch/arm/mach-mx3/crm_regs.h rename to arch/arm/mach-imx/crmregs-imx31.h diff --git a/arch/arm/mach-imx/devices-imx1.h b/arch/arm/mach-imx/devices-imx1.h index da593657ff3f..3aad1e70de96 100644 --- a/arch/arm/mach-imx/devices-imx1.h +++ b/arch/arm/mach-imx/devices-imx1.h @@ -9,21 +9,21 @@ #include #include -extern const struct imx_imx_fb_data imx1_imx_fb_data __initconst; +extern const struct imx_imx_fb_data imx1_imx_fb_data; #define imx1_add_imx_fb(pdata) \ imx_add_imx_fb(&imx1_imx_fb_data, pdata) -extern const struct imx_imx_i2c_data imx1_imx_i2c_data __initconst; +extern const struct imx_imx_i2c_data imx1_imx_i2c_data; #define imx1_add_imx_i2c(pdata) \ imx_add_imx_i2c(&imx1_imx_i2c_data, pdata) -extern const struct imx_imx_uart_3irq_data imx1_imx_uart_data[] __initconst; +extern const struct imx_imx_uart_3irq_data imx1_imx_uart_data[]; #define imx1_add_imx_uart(id, pdata) \ imx_add_imx_uart_3irq(&imx1_imx_uart_data[id], pdata) #define imx1_add_imx_uart0(pdata) imx1_add_imx_uart(0, pdata) #define imx1_add_imx_uart1(pdata) imx1_add_imx_uart(1, pdata) -extern const struct imx_spi_imx_data imx1_cspi_data[] __initconst; +extern const struct imx_spi_imx_data imx1_cspi_data[]; #define imx1_add_cspi(id, pdata) \ imx_add_spi_imx(&imx1_cspi_data[id], pdata) diff --git a/arch/arm/mach-imx/devices-imx21.h b/arch/arm/mach-imx/devices-imx21.h index 16744d2d9b81..2628e0c474dc 100644 --- a/arch/arm/mach-imx/devices-imx21.h +++ b/arch/arm/mach-imx/devices-imx21.h @@ -9,31 +9,31 @@ #include #include -extern const struct imx_imx21_hcd_data imx21_imx21_hcd_data __initconst; +extern const struct imx_imx21_hcd_data imx21_imx21_hcd_data; #define imx21_add_imx21_hcd(pdata) \ imx_add_imx21_hcd(&imx21_imx21_hcd_data, pdata) -extern const struct imx_imx2_wdt_data imx21_imx2_wdt_data __initconst; +extern const struct imx_imx2_wdt_data imx21_imx2_wdt_data; #define imx21_add_imx2_wdt(pdata) \ imx_add_imx2_wdt(&imx21_imx2_wdt_data) -extern const struct imx_imx_fb_data imx21_imx_fb_data __initconst; +extern const struct imx_imx_fb_data imx21_imx_fb_data; #define imx21_add_imx_fb(pdata) \ imx_add_imx_fb(&imx21_imx_fb_data, pdata) -extern const struct imx_imx_i2c_data imx21_imx_i2c_data __initconst; +extern const struct imx_imx_i2c_data imx21_imx_i2c_data; #define imx21_add_imx_i2c(pdata) \ imx_add_imx_i2c(&imx21_imx_i2c_data, pdata) -extern const struct imx_imx_keypad_data imx21_imx_keypad_data __initconst; +extern const struct imx_imx_keypad_data imx21_imx_keypad_data; #define imx21_add_imx_keypad(pdata) \ imx_add_imx_keypad(&imx21_imx_keypad_data, pdata) -extern const struct imx_imx_ssi_data imx21_imx_ssi_data[] __initconst; +extern const struct imx_imx_ssi_data imx21_imx_ssi_data[]; #define imx21_add_imx_ssi(id, pdata) \ imx_add_imx_ssi(&imx21_imx_ssi_data[id], pdata) -extern const struct imx_imx_uart_1irq_data imx21_imx_uart_data[] __initconst; +extern const struct imx_imx_uart_1irq_data imx21_imx_uart_data[]; #define imx21_add_imx_uart(id, pdata) \ imx_add_imx_uart_1irq(&imx21_imx_uart_data[id], pdata) #define imx21_add_imx_uart0(pdata) imx21_add_imx_uart(0, pdata) @@ -41,19 +41,19 @@ extern const struct imx_imx_uart_1irq_data imx21_imx_uart_data[] __initconst; #define imx21_add_imx_uart2(pdata) imx21_add_imx_uart(2, pdata) #define imx21_add_imx_uart3(pdata) imx21_add_imx_uart(3, pdata) -extern const struct imx_mxc_mmc_data imx21_mxc_mmc_data[] __initconst; +extern const struct imx_mxc_mmc_data imx21_mxc_mmc_data[]; #define imx21_add_mxc_mmc(id, pdata) \ imx_add_mxc_mmc(&imx21_mxc_mmc_data[id], pdata) -extern const struct imx_mxc_nand_data imx21_mxc_nand_data __initconst; +extern const struct imx_mxc_nand_data imx21_mxc_nand_data; #define imx21_add_mxc_nand(pdata) \ imx_add_mxc_nand(&imx21_mxc_nand_data, pdata) -extern const struct imx_mxc_w1_data imx21_mxc_w1_data __initconst; +extern const struct imx_mxc_w1_data imx21_mxc_w1_data; #define imx21_add_mxc_w1(pdata) \ imx_add_mxc_w1(&imx21_mxc_w1_data) -extern const struct imx_spi_imx_data imx21_cspi_data[] __initconst; +extern const struct imx_spi_imx_data imx21_cspi_data[]; #define imx21_add_cspi(id, pdata) \ imx_add_spi_imx(&imx21_cspi_data[id], pdata) #define imx21_add_spi_imx0(pdata) imx21_add_cspi(0, pdata) diff --git a/arch/arm/mach-imx/devices-imx25.h b/arch/arm/mach-imx/devices-imx25.h index b591d72f6037..efa0761c508d 100644 --- a/arch/arm/mach-imx/devices-imx25.h +++ b/arch/arm/mach-imx/devices-imx25.h @@ -9,48 +9,48 @@ #include #include -extern const struct imx_fec_data imx25_fec_data __initconst; +extern const struct imx_fec_data imx25_fec_data; #define imx25_add_fec(pdata) \ imx_add_fec(&imx25_fec_data, pdata) -extern const struct imx_flexcan_data imx25_flexcan_data[] __initconst; +extern const struct imx_flexcan_data imx25_flexcan_data[]; #define imx25_add_flexcan(id, pdata) \ imx_add_flexcan(&imx25_flexcan_data[id], pdata) #define imx25_add_flexcan0(pdata) imx25_add_flexcan(0, pdata) #define imx25_add_flexcan1(pdata) imx25_add_flexcan(1, pdata) -extern const struct imx_fsl_usb2_udc_data imx25_fsl_usb2_udc_data __initconst; +extern const struct imx_fsl_usb2_udc_data imx25_fsl_usb2_udc_data; #define imx25_add_fsl_usb2_udc(pdata) \ imx_add_fsl_usb2_udc(&imx25_fsl_usb2_udc_data, pdata) -extern struct imx_imxdi_rtc_data imx25_imxdi_rtc_data __initconst; +extern struct imx_imxdi_rtc_data imx25_imxdi_rtc_data; #define imx25_add_imxdi_rtc(pdata) \ imx_add_imxdi_rtc(&imx25_imxdi_rtc_data) -extern const struct imx_imx2_wdt_data imx25_imx2_wdt_data __initconst; +extern const struct imx_imx2_wdt_data imx25_imx2_wdt_data; #define imx25_add_imx2_wdt(pdata) \ imx_add_imx2_wdt(&imx25_imx2_wdt_data) -extern const struct imx_imx_fb_data imx25_imx_fb_data __initconst; +extern const struct imx_imx_fb_data imx25_imx_fb_data; #define imx25_add_imx_fb(pdata) \ imx_add_imx_fb(&imx25_imx_fb_data, pdata) -extern const struct imx_imx_i2c_data imx25_imx_i2c_data[] __initconst; +extern const struct imx_imx_i2c_data imx25_imx_i2c_data[]; #define imx25_add_imx_i2c(id, pdata) \ imx_add_imx_i2c(&imx25_imx_i2c_data[id], pdata) #define imx25_add_imx_i2c0(pdata) imx25_add_imx_i2c(0, pdata) #define imx25_add_imx_i2c1(pdata) imx25_add_imx_i2c(1, pdata) #define imx25_add_imx_i2c2(pdata) imx25_add_imx_i2c(2, pdata) -extern const struct imx_imx_keypad_data imx25_imx_keypad_data __initconst; +extern const struct imx_imx_keypad_data imx25_imx_keypad_data; #define imx25_add_imx_keypad(pdata) \ imx_add_imx_keypad(&imx25_imx_keypad_data, pdata) -extern const struct imx_imx_ssi_data imx25_imx_ssi_data[] __initconst; +extern const struct imx_imx_ssi_data imx25_imx_ssi_data[]; #define imx25_add_imx_ssi(id, pdata) \ imx_add_imx_ssi(&imx25_imx_ssi_data[id], pdata) -extern const struct imx_imx_uart_1irq_data imx25_imx_uart_data[] __initconst; +extern const struct imx_imx_uart_1irq_data imx25_imx_uart_data[]; #define imx25_add_imx_uart(id, pdata) \ imx_add_imx_uart_1irq(&imx25_imx_uart_data[id], pdata) #define imx25_add_imx_uart0(pdata) imx25_add_imx_uart(0, pdata) @@ -59,33 +59,32 @@ extern const struct imx_imx_uart_1irq_data imx25_imx_uart_data[] __initconst; #define imx25_add_imx_uart3(pdata) imx25_add_imx_uart(3, pdata) #define imx25_add_imx_uart4(pdata) imx25_add_imx_uart(4, pdata) -extern const struct imx_mx2_camera_data imx25_mx2_camera_data __initconst; +extern const struct imx_mx2_camera_data imx25_mx2_camera_data; #define imx25_add_mx2_camera(pdata) \ imx_add_mx2_camera(&imx25_mx2_camera_data, pdata) -extern const struct imx_mxc_ehci_data imx25_mxc_ehci_otg_data __initconst; +extern const struct imx_mxc_ehci_data imx25_mxc_ehci_otg_data; #define imx25_add_mxc_ehci_otg(pdata) \ imx_add_mxc_ehci(&imx25_mxc_ehci_otg_data, pdata) -extern const struct imx_mxc_ehci_data imx25_mxc_ehci_hs_data __initconst; +extern const struct imx_mxc_ehci_data imx25_mxc_ehci_hs_data; #define imx25_add_mxc_ehci_hs(pdata) \ imx_add_mxc_ehci(&imx25_mxc_ehci_hs_data, pdata) -extern const struct imx_mxc_nand_data imx25_mxc_nand_data __initconst; +extern const struct imx_mxc_nand_data imx25_mxc_nand_data; #define imx25_add_mxc_nand(pdata) \ imx_add_mxc_nand(&imx25_mxc_nand_data, pdata) -extern const struct imx_sdhci_esdhc_imx_data -imx25_sdhci_esdhc_imx_data[] __initconst; +extern const struct imx_sdhci_esdhc_imx_data imx25_sdhci_esdhc_imx_data[]; #define imx25_add_sdhci_esdhc_imx(id, pdata) \ imx_add_sdhci_esdhc_imx(&imx25_sdhci_esdhc_imx_data[id], pdata) -extern const struct imx_spi_imx_data imx25_cspi_data[] __initconst; +extern const struct imx_spi_imx_data imx25_cspi_data[]; #define imx25_add_spi_imx(id, pdata) \ imx_add_spi_imx(&imx25_cspi_data[id], pdata) #define imx25_add_spi_imx0(pdata) imx25_add_spi_imx(0, pdata) #define imx25_add_spi_imx1(pdata) imx25_add_spi_imx(1, pdata) #define imx25_add_spi_imx2(pdata) imx25_add_spi_imx(2, pdata) -extern struct imx_mxc_pwm_data imx25_mxc_pwm_data[] __initconst; +extern struct imx_mxc_pwm_data imx25_mxc_pwm_data[]; #define imx25_add_mxc_pwm(id) \ imx_add_mxc_pwm(&imx25_mxc_pwm_data[id]) diff --git a/arch/arm/mach-imx/devices-imx27.h b/arch/arm/mach-imx/devices-imx27.h index f1272d4b5a33..7f97a3cdd41d 100644 --- a/arch/arm/mach-imx/devices-imx27.h +++ b/arch/arm/mach-imx/devices-imx27.h @@ -9,35 +9,35 @@ #include #include -extern const struct imx_fec_data imx27_fec_data __initconst; +extern const struct imx_fec_data imx27_fec_data; #define imx27_add_fec(pdata) \ imx_add_fec(&imx27_fec_data, pdata) -extern const struct imx_fsl_usb2_udc_data imx27_fsl_usb2_udc_data __initconst; +extern const struct imx_fsl_usb2_udc_data imx27_fsl_usb2_udc_data; #define imx27_add_fsl_usb2_udc(pdata) \ imx_add_fsl_usb2_udc(&imx27_fsl_usb2_udc_data, pdata) -extern const struct imx_imx2_wdt_data imx27_imx2_wdt_data __initconst; +extern const struct imx_imx2_wdt_data imx27_imx2_wdt_data; #define imx27_add_imx2_wdt(pdata) \ imx_add_imx2_wdt(&imx27_imx2_wdt_data) -extern const struct imx_imx_fb_data imx27_imx_fb_data __initconst; +extern const struct imx_imx_fb_data imx27_imx_fb_data; #define imx27_add_imx_fb(pdata) \ imx_add_imx_fb(&imx27_imx_fb_data, pdata) -extern const struct imx_imx_i2c_data imx27_imx_i2c_data[] __initconst; +extern const struct imx_imx_i2c_data imx27_imx_i2c_data[]; #define imx27_add_imx_i2c(id, pdata) \ imx_add_imx_i2c(&imx27_imx_i2c_data[id], pdata) -extern const struct imx_imx_keypad_data imx27_imx_keypad_data __initconst; +extern const struct imx_imx_keypad_data imx27_imx_keypad_data; #define imx27_add_imx_keypad(pdata) \ imx_add_imx_keypad(&imx27_imx_keypad_data, pdata) -extern const struct imx_imx_ssi_data imx27_imx_ssi_data[] __initconst; +extern const struct imx_imx_ssi_data imx27_imx_ssi_data[]; #define imx27_add_imx_ssi(id, pdata) \ imx_add_imx_ssi(&imx27_imx_ssi_data[id], pdata) -extern const struct imx_imx_uart_1irq_data imx27_imx_uart_data[] __initconst; +extern const struct imx_imx_uart_1irq_data imx27_imx_uart_data[]; #define imx27_add_imx_uart(id, pdata) \ imx_add_imx_uart_1irq(&imx27_imx_uart_data[id], pdata) #define imx27_add_imx_uart0(pdata) imx27_add_imx_uart(0, pdata) @@ -47,30 +47,30 @@ extern const struct imx_imx_uart_1irq_data imx27_imx_uart_data[] __initconst; #define imx27_add_imx_uart4(pdata) imx27_add_imx_uart(4, pdata) #define imx27_add_imx_uart5(pdata) imx27_add_imx_uart(5, pdata) -extern const struct imx_mx2_camera_data imx27_mx2_camera_data __initconst; +extern const struct imx_mx2_camera_data imx27_mx2_camera_data; #define imx27_add_mx2_camera(pdata) \ imx_add_mx2_camera(&imx27_mx2_camera_data, pdata) -extern const struct imx_mxc_ehci_data imx27_mxc_ehci_otg_data __initconst; +extern const struct imx_mxc_ehci_data imx27_mxc_ehci_otg_data; #define imx27_add_mxc_ehci_otg(pdata) \ imx_add_mxc_ehci(&imx27_mxc_ehci_otg_data, pdata) -extern const struct imx_mxc_ehci_data imx27_mxc_ehci_hs_data[] __initconst; +extern const struct imx_mxc_ehci_data imx27_mxc_ehci_hs_data[]; #define imx27_add_mxc_ehci_hs(id, pdata) \ imx_add_mxc_ehci(&imx27_mxc_ehci_hs_data[id - 1], pdata) -extern const struct imx_mxc_mmc_data imx27_mxc_mmc_data[] __initconst; +extern const struct imx_mxc_mmc_data imx27_mxc_mmc_data[]; #define imx27_add_mxc_mmc(id, pdata) \ imx_add_mxc_mmc(&imx27_mxc_mmc_data[id], pdata) -extern const struct imx_mxc_nand_data imx27_mxc_nand_data __initconst; +extern const struct imx_mxc_nand_data imx27_mxc_nand_data; #define imx27_add_mxc_nand(pdata) \ imx_add_mxc_nand(&imx27_mxc_nand_data, pdata) -extern const struct imx_mxc_w1_data imx27_mxc_w1_data __initconst; +extern const struct imx_mxc_w1_data imx27_mxc_w1_data; #define imx27_add_mxc_w1(pdata) \ imx_add_mxc_w1(&imx27_mxc_w1_data) -extern const struct imx_spi_imx_data imx27_cspi_data[] __initconst; +extern const struct imx_spi_imx_data imx27_cspi_data[]; #define imx27_add_cspi(id, pdata) \ imx_add_spi_imx(&imx27_cspi_data[id], pdata) #define imx27_add_spi_imx0(pdata) imx27_add_cspi(0, pdata) diff --git a/arch/arm/mach-imx/devices-imx31.h b/arch/arm/mach-imx/devices-imx31.h new file mode 100644 index 000000000000..dbe940d9c53a --- /dev/null +++ b/arch/arm/mach-imx/devices-imx31.h @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010 Pengutronix + * Uwe Kleine-Koenig + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#include +#include + +extern const struct imx_fsl_usb2_udc_data imx31_fsl_usb2_udc_data; +#define imx31_add_fsl_usb2_udc(pdata) \ + imx_add_fsl_usb2_udc(&imx31_fsl_usb2_udc_data, pdata) + +extern const struct imx_imx2_wdt_data imx31_imx2_wdt_data; +#define imx31_add_imx2_wdt(pdata) \ + imx_add_imx2_wdt(&imx31_imx2_wdt_data) + +extern const struct imx_imx_i2c_data imx31_imx_i2c_data[]; +#define imx31_add_imx_i2c(id, pdata) \ + imx_add_imx_i2c(&imx31_imx_i2c_data[id], pdata) +#define imx31_add_imx_i2c0(pdata) imx31_add_imx_i2c(0, pdata) +#define imx31_add_imx_i2c1(pdata) imx31_add_imx_i2c(1, pdata) +#define imx31_add_imx_i2c2(pdata) imx31_add_imx_i2c(2, pdata) + +extern const struct imx_imx_keypad_data imx31_imx_keypad_data; +#define imx31_add_imx_keypad(pdata) \ + imx_add_imx_keypad(&imx31_imx_keypad_data, pdata) + +extern const struct imx_imx_ssi_data imx31_imx_ssi_data[]; +#define imx31_add_imx_ssi(id, pdata) \ + imx_add_imx_ssi(&imx31_imx_ssi_data[id], pdata) + +extern const struct imx_imx_uart_1irq_data imx31_imx_uart_data[]; +#define imx31_add_imx_uart(id, pdata) \ + imx_add_imx_uart_1irq(&imx31_imx_uart_data[id], pdata) +#define imx31_add_imx_uart0(pdata) imx31_add_imx_uart(0, pdata) +#define imx31_add_imx_uart1(pdata) imx31_add_imx_uart(1, pdata) +#define imx31_add_imx_uart2(pdata) imx31_add_imx_uart(2, pdata) +#define imx31_add_imx_uart3(pdata) imx31_add_imx_uart(3, pdata) +#define imx31_add_imx_uart4(pdata) imx31_add_imx_uart(4, pdata) + +extern const struct imx_ipu_core_data imx31_ipu_core_data; +#define imx31_add_ipu_core(pdata) \ + imx_add_ipu_core(&imx31_ipu_core_data, pdata) +#define imx31_alloc_mx3_camera(pdata) \ + imx_alloc_mx3_camera(&imx31_ipu_core_data, pdata) +#define imx31_add_mx3_sdc_fb(pdata) \ + imx_add_mx3_sdc_fb(&imx31_ipu_core_data, pdata) + +extern const struct imx_mxc_ehci_data imx31_mxc_ehci_otg_data; +#define imx31_add_mxc_ehci_otg(pdata) \ + imx_add_mxc_ehci(&imx31_mxc_ehci_otg_data, pdata) +extern const struct imx_mxc_ehci_data imx31_mxc_ehci_hs_data[]; +#define imx31_add_mxc_ehci_hs(id, pdata) \ + imx_add_mxc_ehci(&imx31_mxc_ehci_hs_data[id - 1], pdata) + +extern const struct imx_mxc_mmc_data imx31_mxc_mmc_data[]; +#define imx31_add_mxc_mmc(id, pdata) \ + imx_add_mxc_mmc(&imx31_mxc_mmc_data[id], pdata) + +extern const struct imx_mxc_nand_data imx31_mxc_nand_data; +#define imx31_add_mxc_nand(pdata) \ + imx_add_mxc_nand(&imx31_mxc_nand_data, pdata) + +extern const struct imx_mxc_rtc_data imx31_mxc_rtc_data; +#define imx31_add_mxc_rtc(pdata) \ + imx_add_mxc_rtc(&imx31_mxc_rtc_data) + +extern const struct imx_mxc_w1_data imx31_mxc_w1_data; +#define imx31_add_mxc_w1(pdata) \ + imx_add_mxc_w1(&imx31_mxc_w1_data) + +extern const struct imx_spi_imx_data imx31_cspi_data[]; +#define imx31_add_cspi(id, pdata) \ + imx_add_spi_imx(&imx31_cspi_data[id], pdata) +#define imx31_add_spi_imx0(pdata) imx31_add_cspi(0, pdata) +#define imx31_add_spi_imx1(pdata) imx31_add_cspi(1, pdata) +#define imx31_add_spi_imx2(pdata) imx31_add_cspi(2, pdata) diff --git a/arch/arm/mach-imx/devices-imx35.h b/arch/arm/mach-imx/devices-imx35.h new file mode 100644 index 000000000000..234cbd3c18af --- /dev/null +++ b/arch/arm/mach-imx/devices-imx35.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2010 Pengutronix + * Uwe Kleine-Koenig + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#include +#include + +extern const struct imx_fec_data imx35_fec_data; +#define imx35_add_fec(pdata) \ + imx_add_fec(&imx35_fec_data, pdata) + +extern const struct imx_fsl_usb2_udc_data imx35_fsl_usb2_udc_data; +#define imx35_add_fsl_usb2_udc(pdata) \ + imx_add_fsl_usb2_udc(&imx35_fsl_usb2_udc_data, pdata) + +extern const struct imx_flexcan_data imx35_flexcan_data[]; +#define imx35_add_flexcan(id, pdata) \ + imx_add_flexcan(&imx35_flexcan_data[id], pdata) +#define imx35_add_flexcan0(pdata) imx35_add_flexcan(0, pdata) +#define imx35_add_flexcan1(pdata) imx35_add_flexcan(1, pdata) + +extern const struct imx_imx2_wdt_data imx35_imx2_wdt_data; +#define imx35_add_imx2_wdt(pdata) \ + imx_add_imx2_wdt(&imx35_imx2_wdt_data) + +extern const struct imx_imx_i2c_data imx35_imx_i2c_data[]; +#define imx35_add_imx_i2c(id, pdata) \ + imx_add_imx_i2c(&imx35_imx_i2c_data[id], pdata) +#define imx35_add_imx_i2c0(pdata) imx35_add_imx_i2c(0, pdata) +#define imx35_add_imx_i2c1(pdata) imx35_add_imx_i2c(1, pdata) +#define imx35_add_imx_i2c2(pdata) imx35_add_imx_i2c(2, pdata) + +extern const struct imx_imx_keypad_data imx35_imx_keypad_data; +#define imx35_add_imx_keypad(pdata) \ + imx_add_imx_keypad(&imx35_imx_keypad_data, pdata) + +extern const struct imx_imx_ssi_data imx35_imx_ssi_data[]; +#define imx35_add_imx_ssi(id, pdata) \ + imx_add_imx_ssi(&imx35_imx_ssi_data[id], pdata) + +extern const struct imx_imx_uart_1irq_data imx35_imx_uart_data[]; +#define imx35_add_imx_uart(id, pdata) \ + imx_add_imx_uart_1irq(&imx35_imx_uart_data[id], pdata) +#define imx35_add_imx_uart0(pdata) imx35_add_imx_uart(0, pdata) +#define imx35_add_imx_uart1(pdata) imx35_add_imx_uart(1, pdata) +#define imx35_add_imx_uart2(pdata) imx35_add_imx_uart(2, pdata) + +extern const struct imx_ipu_core_data imx35_ipu_core_data; +#define imx35_add_ipu_core(pdata) \ + imx_add_ipu_core(&imx35_ipu_core_data, pdata) +#define imx35_alloc_mx3_camera(pdata) \ + imx_alloc_mx3_camera(&imx35_ipu_core_data, pdata) +#define imx35_add_mx3_sdc_fb(pdata) \ + imx_add_mx3_sdc_fb(&imx35_ipu_core_data, pdata) + +extern const struct imx_mxc_ehci_data imx35_mxc_ehci_otg_data; +#define imx35_add_mxc_ehci_otg(pdata) \ + imx_add_mxc_ehci(&imx35_mxc_ehci_otg_data, pdata) +extern const struct imx_mxc_ehci_data imx35_mxc_ehci_hs_data; +#define imx35_add_mxc_ehci_hs(pdata) \ + imx_add_mxc_ehci(&imx35_mxc_ehci_hs_data, pdata) + +extern const struct imx_mxc_nand_data imx35_mxc_nand_data; +#define imx35_add_mxc_nand(pdata) \ + imx_add_mxc_nand(&imx35_mxc_nand_data, pdata) + +extern const struct imx_mxc_w1_data imx35_mxc_w1_data; +#define imx35_add_mxc_w1(pdata) \ + imx_add_mxc_w1(&imx35_mxc_w1_data) + +extern const struct imx_sdhci_esdhc_imx_data imx35_sdhci_esdhc_imx_data[]; +#define imx35_add_sdhci_esdhc_imx(id, pdata) \ + imx_add_sdhci_esdhc_imx(&imx35_sdhci_esdhc_imx_data[id], pdata) + +extern const struct imx_spi_imx_data imx35_cspi_data[]; +#define imx35_add_cspi(id, pdata) \ + imx_add_spi_imx(&imx35_cspi_data[id], pdata) +#define imx35_add_spi_imx0(pdata) imx35_add_cspi(0, pdata) +#define imx35_add_spi_imx1(pdata) imx35_add_cspi(1, pdata) diff --git a/arch/arm/mach-imx/ehci-imx31.c b/arch/arm/mach-imx/ehci-imx31.c new file mode 100644 index 000000000000..faad0f15ac7f --- /dev/null +++ b/arch/arm/mach-imx/ehci-imx31.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2009 Daniel Mack + * Copyright (C) 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include + +#include +#include + +#define USBCTRL_OTGBASE_OFFSET 0x600 + +#define MX31_OTG_SIC_SHIFT 29 +#define MX31_OTG_SIC_MASK (0x3 << MX31_OTG_SIC_SHIFT) +#define MX31_OTG_PM_BIT (1 << 24) + +#define MX31_H2_SIC_SHIFT 21 +#define MX31_H2_SIC_MASK (0x3 << MX31_H2_SIC_SHIFT) +#define MX31_H2_PM_BIT (1 << 16) +#define MX31_H2_DT_BIT (1 << 5) + +#define MX31_H1_SIC_SHIFT 13 +#define MX31_H1_SIC_MASK (0x3 << MX31_H1_SIC_SHIFT) +#define MX31_H1_PM_BIT (1 << 8) +#define MX31_H1_DT_BIT (1 << 4) + +int mx31_initialize_usb_hw(int port, unsigned int flags) +{ + unsigned int v; + + v = readl(MX31_IO_ADDRESS(MX31_USB_BASE_ADDR + USBCTRL_OTGBASE_OFFSET)); + + switch (port) { + case 0: /* OTG port */ + v &= ~(MX31_OTG_SIC_MASK | MX31_OTG_PM_BIT); + v |= (flags & MXC_EHCI_INTERFACE_MASK) << MX31_OTG_SIC_SHIFT; + + if (!(flags & MXC_EHCI_POWER_PINS_ENABLED)) + v |= MX31_OTG_PM_BIT; + + break; + case 1: /* H1 port */ + v &= ~(MX31_H1_SIC_MASK | MX31_H1_PM_BIT | MX31_H1_DT_BIT); + v |= (flags & MXC_EHCI_INTERFACE_MASK) << MX31_H1_SIC_SHIFT; + + if (!(flags & MXC_EHCI_POWER_PINS_ENABLED)) + v |= MX31_H1_PM_BIT; + + if (!(flags & MXC_EHCI_TTL_ENABLED)) + v |= MX31_H1_DT_BIT; + + break; + case 2: /* H2 port */ + v &= ~(MX31_H2_SIC_MASK | MX31_H2_PM_BIT | MX31_H2_DT_BIT); + v |= (flags & MXC_EHCI_INTERFACE_MASK) << MX31_H2_SIC_SHIFT; + + if (!(flags & MXC_EHCI_POWER_PINS_ENABLED)) + v |= MX31_H2_PM_BIT; + + if (!(flags & MXC_EHCI_TTL_ENABLED)) + v |= MX31_H2_DT_BIT; + + break; + default: + return -EINVAL; + } + + writel(v, MX31_IO_ADDRESS(MX31_USB_BASE_ADDR + USBCTRL_OTGBASE_OFFSET)); + + return 0; +} diff --git a/arch/arm/mach-imx/ehci-imx35.c b/arch/arm/mach-imx/ehci-imx35.c new file mode 100644 index 000000000000..001ec3971f5d --- /dev/null +++ b/arch/arm/mach-imx/ehci-imx35.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2009 Daniel Mack + * Copyright (C) 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include +#include + +#include +#include + +#define USBCTRL_OTGBASE_OFFSET 0x600 + +#define MX35_OTG_SIC_SHIFT 29 +#define MX35_OTG_SIC_MASK (0x3 << MX35_OTG_SIC_SHIFT) +#define MX35_OTG_PM_BIT (1 << 24) + +#define MX35_H1_SIC_SHIFT 21 +#define MX35_H1_SIC_MASK (0x3 << MX35_H1_SIC_SHIFT) +#define MX35_H1_PM_BIT (1 << 8) +#define MX35_H1_IPPUE_UP_BIT (1 << 7) +#define MX35_H1_IPPUE_DOWN_BIT (1 << 6) +#define MX35_H1_TLL_BIT (1 << 5) +#define MX35_H1_USBTE_BIT (1 << 4) + +int mx35_initialize_usb_hw(int port, unsigned int flags) +{ + unsigned int v; + + v = readl(MX35_IO_ADDRESS(MX35_USB_BASE_ADDR + USBCTRL_OTGBASE_OFFSET)); + + switch (port) { + case 0: /* OTG port */ + v &= ~(MX35_OTG_SIC_MASK | MX35_OTG_PM_BIT); + v |= (flags & MXC_EHCI_INTERFACE_MASK) << MX35_OTG_SIC_SHIFT; + + if (!(flags & MXC_EHCI_POWER_PINS_ENABLED)) + v |= MX35_OTG_PM_BIT; + + break; + case 1: /* H1 port */ + v &= ~(MX35_H1_SIC_MASK | MX35_H1_PM_BIT | MX35_H1_TLL_BIT | + MX35_H1_USBTE_BIT | MX35_H1_IPPUE_DOWN_BIT | MX35_H1_IPPUE_UP_BIT); + v |= (flags & MXC_EHCI_INTERFACE_MASK) << MX35_H1_SIC_SHIFT; + + if (!(flags & MXC_EHCI_POWER_PINS_ENABLED)) + v |= MX35_H1_PM_BIT; + + if (!(flags & MXC_EHCI_TTL_ENABLED)) + v |= MX35_H1_TLL_BIT; + + if (flags & MXC_EHCI_INTERNAL_PHY) + v |= MX35_H1_USBTE_BIT; + + if (flags & MXC_EHCI_IPPUE_DOWN) + v |= MX35_H1_IPPUE_DOWN_BIT; + + if (flags & MXC_EHCI_IPPUE_UP) + v |= MX35_H1_IPPUE_UP_BIT; + + break; + default: + return -EINVAL; + } + + writel(v, MX35_IO_ADDRESS(MX35_USB_BASE_ADDR + USBCTRL_OTGBASE_OFFSET)); + + return 0; +} diff --git a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c index fa5288018ba7..5911281da5f5 100644 --- a/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include "devices-imx27.h" diff --git a/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c b/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c index 6269053505f7..f9ef04acdab1 100644 --- a/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c +++ b/arch/arm/mach-imx/eukrea_mbimxsd25-baseboard.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include