From 8d00a6c8f6b08e7167bc03bf955cdc7e47c5132e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 22 Jul 2008 08:39:57 +0200 Subject: genirq: remove last NO_IDLE_HZ leftovers Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 8ccb462ea42c..f3047df2d23c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -197,10 +197,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new); #ifdef CONFIG_GENERIC_HARDIRQS -#ifndef handle_dynamic_tick -# define handle_dynamic_tick(a) do { } while (0) -#endif - #ifdef CONFIG_SMP #if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) -- cgit v1.2.3 From feb2f55db45919aa80731f8877b60cab454b7b94 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 1 Aug 2008 11:53:29 +0300 Subject: [MTD] [OneNAND] Add defines for HF and sync write Signed-off-by: Adrian Hunter Signed-off-by: David Woodhouse --- include/linux/mtd/onenand_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h index d1b310c92eb4..0c6bbe28f38c 100644 --- a/include/linux/mtd/onenand_regs.h +++ b/include/linux/mtd/onenand_regs.h @@ -152,6 +152,8 @@ #define ONENAND_SYS_CFG1_INT (1 << 6) #define ONENAND_SYS_CFG1_IOBE (1 << 5) #define ONENAND_SYS_CFG1_RDY_CONF (1 << 4) +#define ONENAND_SYS_CFG1_HF (1 << 2) +#define ONENAND_SYS_CFG1_SYNC_WRITE (1 << 1) /* * Controller Status Register F240h (R) -- cgit v1.2.3 From 2e489e077a6ad118c4f247faedf330117b107cce Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Tue, 5 Aug 2008 16:39:42 +0100 Subject: [MTD] [NOR] Add qry_mode_on()/qry_omde_off() to deal with odd chips There are some CFI chips which require non standard procedures to get into QRY mode. The possible way to support them would be trying different modes till QRY will be read. This patch introduce two new functions qry_mode_on qry_mode_off. qry_mode_on tries different commands in order switch chip into QRY mode. So if we have one more "odd" chip - we just could add several lines to qry_mode_on. Also using these functions remove unnecessary code duplicaton in porbe procedure. Currently there are two "odd" cases 1. Some old intel chips which require 0xFF before 0x98 2. ST M29DW chip which requires 0x98 to be sent at 0x555 (according to CFI should be 0x55) This patch is partialy based on the patch from Uwe (see "[PATCH 2/4] [RFC][MTD] cfi_probe: remove Intel chip workaround" thread ) Signed-off-by: Alexey Korolev Signed-off-by: Alexander Belyakov Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_probe.c | 52 +++++------------------------------- drivers/mtd/chips/cfi_util.c | 62 ++++++++++++++++++++++++++++++++++++++++--- include/linux/mtd/cfi.h | 9 ++++++- 3 files changed, 73 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c index c418e92e1d92..e706be2ad0cb 100644 --- a/drivers/mtd/chips/cfi_probe.c +++ b/drivers/mtd/chips/cfi_probe.c @@ -44,17 +44,14 @@ do { \ #define xip_enable(base, map, cfi) \ do { \ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \ - cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \ + qry_mode_off(base, map, cfi); \ xip_allowed(base, map); \ } while (0) #define xip_disable_qry(base, map, cfi) \ do { \ xip_disable(); \ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); \ - cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); \ - cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); \ + qry_mode_on(base, map, cfi); \ } while (0) #else @@ -70,32 +67,6 @@ do { \ in: interleave,type,mode ret: table index, <0 for error */ -static int __xipram qry_present(struct map_info *map, __u32 base, - struct cfi_private *cfi) -{ - int osf = cfi->interleave * cfi->device_type; // scale factor - map_word val[3]; - map_word qry[3]; - - qry[0] = cfi_build_cmd('Q', map, cfi); - qry[1] = cfi_build_cmd('R', map, cfi); - qry[2] = cfi_build_cmd('Y', map, cfi); - - val[0] = map_read(map, base + osf*0x10); - val[1] = map_read(map, base + osf*0x11); - val[2] = map_read(map, base + osf*0x12); - - if (!map_word_equal(map, qry[0], val[0])) - return 0; - - if (!map_word_equal(map, qry[1], val[1])) - return 0; - - if (!map_word_equal(map, qry[2], val[2])) - return 0; - - return 1; // "QRY" found -} static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, unsigned long *chip_map, struct cfi_private *cfi) @@ -116,11 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, } xip_disable(); - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); - - if (!qry_present(map,base,cfi)) { + if (!qry_mode_on(base, map, cfi)) { xip_enable(base, map, cfi); return 0; } @@ -144,8 +111,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, if (qry_present(map, start, cfi)) { /* Eep. This chip also had the QRY marker. * Is it an alias for the new one? */ - cfi_send_gen_cmd(0xF0, 0, start, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL); + qry_mode_off(start, map, cfi); /* If the QRY marker goes away, it's an alias */ if (!qry_present(map, start, cfi)) { @@ -158,8 +124,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, * unfortunate. Stick the new chip in read mode * too and if it's the same, assume it's an alias. */ /* FIXME: Use other modes to do a proper check */ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0xFF, 0, start, map, cfi, cfi->device_type, NULL); + qry_mode_off(base, map, cfi); if (qry_present(map, base, cfi)) { xip_allowed(base, map); @@ -176,8 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, cfi->numchips++; /* Put it back into Read Mode */ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); - cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); + qry_mode_off(base, map, cfi); xip_allowed(base, map); printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n", @@ -237,9 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map, cfi_read_query(map, base + 0xf * ofs_factor); /* Put it back into Read Mode */ - cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); - /* ... even if it's an Intel chip */ - cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); + qry_mode_off(base, map, cfi); xip_allowed(base, map); /* Do any necessary byteswapping */ diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c index 0ee457018016..8d7553670526 100644 --- a/drivers/mtd/chips/cfi_util.c +++ b/drivers/mtd/chips/cfi_util.c @@ -24,6 +24,62 @@ #include #include +int __xipram qry_present(struct map_info *map, __u32 base, + struct cfi_private *cfi) +{ + int osf = cfi->interleave * cfi->device_type; /* scale factor */ + map_word val[3]; + map_word qry[3]; + + qry[0] = cfi_build_cmd('Q', map, cfi); + qry[1] = cfi_build_cmd('R', map, cfi); + qry[2] = cfi_build_cmd('Y', map, cfi); + + val[0] = map_read(map, base + osf*0x10); + val[1] = map_read(map, base + osf*0x11); + val[2] = map_read(map, base + osf*0x12); + + if (!map_word_equal(map, qry[0], val[0])) + return 0; + + if (!map_word_equal(map, qry[1], val[1])) + return 0; + + if (!map_word_equal(map, qry[2], val[2])) + return 0; + + return 1; /* "QRY" found */ +} + +int __xipram qry_mode_on(uint32_t base, struct map_info *map, + struct cfi_private *cfi) +{ + cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); + cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); + if (qry_present(map, base, cfi)) + return 1; + /* QRY not found probably we deal with some odd CFI chips */ + /* Some revisions of some old Intel chips? */ + cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); + cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); + cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); + if (qry_present(map, base, cfi)) + return 1; + /* ST M29DW chips */ + cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); + cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL); + if (qry_present(map, base, cfi)) + return 1; + /* QRY not found */ + return 0; +} +void __xipram qry_mode_off(uint32_t base, struct map_info *map, + struct cfi_private *cfi) +{ + cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); + cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); +} + struct cfi_extquery * __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name) { @@ -48,8 +104,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n #endif /* Switch it into Query Mode */ - cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); - + qry_mode_on(base, map, cfi); /* Read in the Extended Query Table */ for (i=0; idevice_type, NULL); - cfi_send_gen_cmd(0xff, 0, base, map, cfi, cfi->device_type, NULL); + qry_mode_off(base, map, cfi); #ifdef CONFIG_MTD_XIP (void) map_read(map, base); diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index d6fb115f5a07..3058917d7b92 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_MTD_CFI_I1 #define cfi_interleave(cfi) 1 @@ -430,7 +431,6 @@ static inline uint32_t cfi_send_gen_cmd(u_char cmd, uint32_t cmd_addr, uint32_t { map_word val; uint32_t addr = base + cfi_build_cmd_addr(cmd_addr, cfi_interleave(cfi), type); - val = cfi_build_cmd(cmd, map, cfi); if (prev_val) @@ -483,6 +483,13 @@ static inline void cfi_udelay(int us) } } +int __xipram qry_present(struct map_info *map, __u32 base, + struct cfi_private *cfi); +int __xipram qry_mode_on(uint32_t base, struct map_info *map, + struct cfi_private *cfi); +void __xipram qry_mode_off(uint32_t base, struct map_info *map, + struct cfi_private *cfi); + struct cfi_extquery *cfi_read_pri(struct map_info *map, uint16_t adr, uint16_t size, const char* name); struct cfi_fixup { -- cgit v1.2.3 From e93cafe45fd74935e0aca2b79e533f0e3ed9640f Mon Sep 17 00:00:00 2001 From: Anders Grafström Date: Tue, 5 Aug 2008 18:37:41 +0200 Subject: [MTD] [NOR] cfi_cmdset_0001: Timeouts for erase, write and unlock operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timeouts are currently given by the typical operation time times 8. It works in the general well-behaved case but not when an erase block is failing. For erase operations, it seems that a failing erase block will keep the device state machine in erasing state until the vendor specified maximum timeout period has passed. By this time the driver would have long since timed out, left erasing state and attempted further operations which all fail. This patch implements timeouts using values from the CFI Query structure when available. The patch also sets a longer timeout for locking operations. The current value used for locking/unlocking given by 1000000/HZ microseconds is too short for devices like J3 and J5 Strataflash which have a typical clear lock-bits time of 0.5 seconds. Signed-off-by: Anders Grafström Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_cmdset_0001.c | 52 +++++++++++++++++++++++++++---------- include/linux/mtd/flashchip.h | 4 +++ 2 files changed, 42 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index d49cbe2738a8..5157e3cb4b9e 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -478,6 +478,28 @@ struct mtd_info *cfi_cmdset_0001(struct map_info *map, int primary) else cfi->chips[i].erase_time = 2000000; + if (cfi->cfiq->WordWriteTimeoutTyp && + cfi->cfiq->WordWriteTimeoutMax) + cfi->chips[i].word_write_time_max = + 1<<(cfi->cfiq->WordWriteTimeoutTyp + + cfi->cfiq->WordWriteTimeoutMax); + else + cfi->chips[i].word_write_time_max = 50000 * 8; + + if (cfi->cfiq->BufWriteTimeoutTyp && + cfi->cfiq->BufWriteTimeoutMax) + cfi->chips[i].buffer_write_time_max = + 1<<(cfi->cfiq->BufWriteTimeoutTyp + + cfi->cfiq->BufWriteTimeoutMax); + + if (cfi->cfiq->BlockEraseTimeoutTyp && + cfi->cfiq->BlockEraseTimeoutMax) + cfi->chips[i].erase_time_max = + 1000<<(cfi->cfiq->BlockEraseTimeoutTyp + + cfi->cfiq->BlockEraseTimeoutMax); + else + cfi->chips[i].erase_time_max = 2000000 * 8; + cfi->chips[i].ref_point_counter = 0; init_waitqueue_head(&(cfi->chips[i].wq)); } @@ -1012,7 +1034,7 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip, static int __xipram xip_wait_for_operation( struct map_info *map, struct flchip *chip, - unsigned long adr, unsigned int chip_op_time ) + unsigned long adr, unsigned int chip_op_time_max) { struct cfi_private *cfi = map->fldrv_priv; struct cfi_pri_intelext *cfip = cfi->cmdset_priv; @@ -1021,7 +1043,7 @@ static int __xipram xip_wait_for_operation( flstate_t oldstate, newstate; start = xip_currtime(); - usec = chip_op_time * 8; + usec = chip_op_time_max; if (usec == 0) usec = 500000; done = 0; @@ -1131,8 +1153,8 @@ static int __xipram xip_wait_for_operation( #define XIP_INVAL_CACHED_RANGE(map, from, size) \ INVALIDATE_CACHED_RANGE(map, from, size) -#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec) \ - xip_wait_for_operation(map, chip, cmd_adr, usec) +#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \ + xip_wait_for_operation(map, chip, cmd_adr, usec_max) #else @@ -1144,7 +1166,7 @@ static int __xipram xip_wait_for_operation( static int inval_cache_and_wait_for_operation( struct map_info *map, struct flchip *chip, unsigned long cmd_adr, unsigned long inval_adr, int inval_len, - unsigned int chip_op_time) + unsigned int chip_op_time, unsigned int chip_op_time_max) { struct cfi_private *cfi = map->fldrv_priv; map_word status, status_OK = CMD(0x80); @@ -1156,8 +1178,7 @@ static int inval_cache_and_wait_for_operation( INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len); spin_lock(chip->mutex); - /* set our timeout to 8 times the expected delay */ - timeo = chip_op_time * 8; + timeo = chip_op_time_max; if (!timeo) timeo = 500000; reset_timeo = timeo; @@ -1217,8 +1238,8 @@ static int inval_cache_and_wait_for_operation( #endif -#define WAIT_TIMEOUT(map, chip, adr, udelay) \ - INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay); +#define WAIT_TIMEOUT(map, chip, adr, udelay, udelay_max) \ + INVAL_CACHE_AND_WAIT(map, chip, adr, 0, 0, udelay, udelay_max); static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t adr, size_t len) @@ -1452,7 +1473,8 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip, ret = INVAL_CACHE_AND_WAIT(map, chip, adr, adr, map_bankwidth(map), - chip->word_write_time); + chip->word_write_time, + chip->word_write_time_max); if (ret) { xip_enable(map, chip, adr); printk(KERN_ERR "%s: word write error (status timeout)\n", map->name); @@ -1623,7 +1645,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, chip->state = FL_WRITING_TO_BUFFER; map_write(map, write_cmd, cmd_adr); - ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0); + ret = WAIT_TIMEOUT(map, chip, cmd_adr, 0, 0); if (ret) { /* Argh. Not ready for write to buffer */ map_word Xstatus = map_read(map, cmd_adr); @@ -1692,7 +1714,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip, ret = INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, initial_adr, initial_len, - chip->buffer_write_time); + chip->buffer_write_time, + chip->buffer_write_time_max); if (ret) { map_write(map, CMD(0x70), cmd_adr); chip->state = FL_STATUS; @@ -1827,7 +1850,8 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip, ret = INVAL_CACHE_AND_WAIT(map, chip, adr, adr, len, - chip->erase_time); + chip->erase_time, + chip->erase_time_max); if (ret) { map_write(map, CMD(0x70), adr); chip->state = FL_STATUS; @@ -2006,7 +2030,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip */ udelay = (!extp || !(extp->FeatureSupport & (1 << 5))) ? 1000000/HZ : 0; - ret = WAIT_TIMEOUT(map, chip, adr, udelay); + ret = WAIT_TIMEOUT(map, chip, adr, udelay, udelay * 100); if (ret) { map_write(map, CMD(0x70), adr); chip->state = FL_STATUS; diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h index 08dd131301c1..d4f38c5fd44e 100644 --- a/include/linux/mtd/flashchip.h +++ b/include/linux/mtd/flashchip.h @@ -73,6 +73,10 @@ struct flchip { int buffer_write_time; int erase_time; + int word_write_time_max; + int buffer_write_time_max; + int erase_time_max; + void *priv; }; -- cgit v1.2.3 From c314dfdc358847eef0fc07ec8682e1acc8cadd00 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 7 Aug 2008 11:55:07 +0100 Subject: [MTD] [NOR] Rename and export new cfi_qry_*() functions They need to be exported, so let's give them less generic-sounding names while we're at it. Original export patch, along with the suggestion about the nomenclature, from Stephen Rothwell. Signed-off-by: David Woodhouse --- drivers/mtd/chips/cfi_probe.c | 20 ++++++++++---------- drivers/mtd/chips/cfi_util.c | 26 +++++++++++++++----------- include/linux/mtd/cfi.h | 12 ++++++------ 3 files changed, 31 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c index e706be2ad0cb..e63e6749429a 100644 --- a/drivers/mtd/chips/cfi_probe.c +++ b/drivers/mtd/chips/cfi_probe.c @@ -44,14 +44,14 @@ do { \ #define xip_enable(base, map, cfi) \ do { \ - qry_mode_off(base, map, cfi); \ + cfi_qry_mode_off(base, map, cfi); \ xip_allowed(base, map); \ } while (0) #define xip_disable_qry(base, map, cfi) \ do { \ xip_disable(); \ - qry_mode_on(base, map, cfi); \ + cfi_qry_mode_on(base, map, cfi); \ } while (0) #else @@ -87,7 +87,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, } xip_disable(); - if (!qry_mode_on(base, map, cfi)) { + if (!cfi_qry_mode_on(base, map, cfi)) { xip_enable(base, map, cfi); return 0; } @@ -108,13 +108,13 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, start = i << cfi->chipshift; /* This chip should be in read mode if it's one we've already touched. */ - if (qry_present(map, start, cfi)) { + if (cfi_qry_present(map, start, cfi)) { /* Eep. This chip also had the QRY marker. * Is it an alias for the new one? */ - qry_mode_off(start, map, cfi); + cfi_qry_mode_off(start, map, cfi); /* If the QRY marker goes away, it's an alias */ - if (!qry_present(map, start, cfi)) { + if (!cfi_qry_present(map, start, cfi)) { xip_allowed(base, map); printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", map->name, base, start); @@ -124,9 +124,9 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, * unfortunate. Stick the new chip in read mode * too and if it's the same, assume it's an alias. */ /* FIXME: Use other modes to do a proper check */ - qry_mode_off(base, map, cfi); + cfi_qry_mode_off(base, map, cfi); - if (qry_present(map, base, cfi)) { + if (cfi_qry_present(map, base, cfi)) { xip_allowed(base, map); printk(KERN_DEBUG "%s: Found an alias at 0x%x for the chip at 0x%lx\n", map->name, base, start); @@ -141,7 +141,7 @@ static int __xipram cfi_probe_chip(struct map_info *map, __u32 base, cfi->numchips++; /* Put it back into Read Mode */ - qry_mode_off(base, map, cfi); + cfi_qry_mode_off(base, map, cfi); xip_allowed(base, map); printk(KERN_INFO "%s: Found %d x%d devices at 0x%x in %d-bit bank\n", @@ -201,7 +201,7 @@ static int __xipram cfi_chip_setup(struct map_info *map, cfi_read_query(map, base + 0xf * ofs_factor); /* Put it back into Read Mode */ - qry_mode_off(base, map, cfi); + cfi_qry_mode_off(base, map, cfi); xip_allowed(base, map); /* Do any necessary byteswapping */ diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c index 8d7553670526..34d40e25d312 100644 --- a/drivers/mtd/chips/cfi_util.c +++ b/drivers/mtd/chips/cfi_util.c @@ -24,8 +24,8 @@ #include #include -int __xipram qry_present(struct map_info *map, __u32 base, - struct cfi_private *cfi) +int __xipram cfi_qry_present(struct map_info *map, __u32 base, + struct cfi_private *cfi) { int osf = cfi->interleave * cfi->device_type; /* scale factor */ map_word val[3]; @@ -50,35 +50,39 @@ int __xipram qry_present(struct map_info *map, __u32 base, return 1; /* "QRY" found */ } +EXPORT_SYMBOL_GPL(cfi_qry_present); -int __xipram qry_mode_on(uint32_t base, struct map_info *map, - struct cfi_private *cfi) +int __xipram cfi_qry_mode_on(uint32_t base, struct map_info *map, + struct cfi_private *cfi) { cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); - if (qry_present(map, base, cfi)) + if (cfi_qry_present(map, base, cfi)) return 1; /* QRY not found probably we deal with some odd CFI chips */ /* Some revisions of some old Intel chips? */ cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x98, 0x55, base, map, cfi, cfi->device_type, NULL); - if (qry_present(map, base, cfi)) + if (cfi_qry_present(map, base, cfi)) return 1; /* ST M29DW chips */ cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0x98, 0x555, base, map, cfi, cfi->device_type, NULL); - if (qry_present(map, base, cfi)) + if (cfi_qry_present(map, base, cfi)) return 1; /* QRY not found */ return 0; } -void __xipram qry_mode_off(uint32_t base, struct map_info *map, - struct cfi_private *cfi) +EXPORT_SYMBOL_GPL(cfi_qry_mode_on); + +void __xipram cfi_qry_mode_off(uint32_t base, struct map_info *map, + struct cfi_private *cfi) { cfi_send_gen_cmd(0xF0, 0, base, map, cfi, cfi->device_type, NULL); cfi_send_gen_cmd(0xFF, 0, base, map, cfi, cfi->device_type, NULL); } +EXPORT_SYMBOL_GPL(cfi_qry_mode_off); struct cfi_extquery * __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* name) @@ -104,7 +108,7 @@ __xipram cfi_read_pri(struct map_info *map, __u16 adr, __u16 size, const char* n #endif /* Switch it into Query Mode */ - qry_mode_on(base, map, cfi); + cfi_qry_mode_on(base, map, cfi); /* Read in the Extended Query Table */ for (i=0; i Date: Tue, 12 Aug 2008 12:40:50 +0300 Subject: [MTD] Define and use MTD_FAIL_ADDR_UNKNOWN instead of 0xffffffff Signed-off-by: Adrian Hunter Signed-off-by: David Woodhouse --- drivers/mtd/mtdconcat.c | 4 ++-- drivers/mtd/mtdpart.c | 4 ++-- drivers/mtd/nand/nand_base.c | 2 +- drivers/mtd/onenand/onenand_base.c | 2 +- fs/jffs2/erase.c | 4 ++-- include/linux/mtd/mtd.h | 4 +++- 6 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 2972a5edb73d..789842d0e6f2 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -444,7 +444,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr) return -EINVAL; } - instr->fail_addr = 0xffffffff; + instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; /* make a local copy of instr to avoid modifying the caller's struct */ erase = kmalloc(sizeof (struct erase_info), GFP_KERNEL); @@ -493,7 +493,7 @@ static int concat_erase(struct mtd_info *mtd, struct erase_info *instr) /* sanity check: should never happen since * block alignment has been checked above */ BUG_ON(err == -EINVAL); - if (erase->fail_addr != 0xffffffff) + if (erase->fail_addr != MTD_FAIL_ADDR_UNKNOWN) instr->fail_addr = erase->fail_addr + offset; break; } diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index edb90b58a9b1..8e77e36e75ee 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -214,7 +214,7 @@ static int part_erase(struct mtd_info *mtd, struct erase_info *instr) instr->addr += part->offset; ret = part->master->erase(part->master, instr); if (ret) { - if (instr->fail_addr != 0xffffffff) + if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN) instr->fail_addr -= part->offset; instr->addr -= part->offset; } @@ -226,7 +226,7 @@ void mtd_erase_callback(struct erase_info *instr) if (instr->mtd->erase == part_erase) { struct mtd_part *part = PART(instr->mtd); - if (instr->fail_addr != 0xffffffff) + if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN) instr->fail_addr -= part->offset; instr->addr -= part->offset; } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index d1129bae6c27..582280560c89 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -2042,7 +2042,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr, return -EINVAL; } - instr->fail_addr = 0xffffffff; + instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; /* Grab the lock and see if the device is available */ nand_get_device(chip, mtd, FL_ERASING); diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 926cf3a4135d..90ed319f26e6 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -1794,7 +1794,7 @@ static int onenand_erase(struct mtd_info *mtd, struct erase_info *instr) return -EINVAL; } - instr->fail_addr = 0xffffffff; + instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; /* Grab the lock and see if the device is available */ onenand_get_device(mtd, FL_ERASING); diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index dddb2a6c9e2c..259461b910af 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, instr->len = c->sector_size; instr->callback = jffs2_erase_callback; instr->priv = (unsigned long)(&instr[1]); - instr->fail_addr = 0xffffffff; + instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; ((struct erase_priv_struct *)instr->priv)->jeb = jeb; ((struct erase_priv_struct *)instr->priv)->c = c; @@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock { /* For NAND, if the failure did not occur at the device level for a specific physical page, don't bother updating the bad block table. */ - if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) { + if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) { /* We had a device-level failure to erase. Let's see if we've failed too many times. */ if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 922636548558..eae26bb6430a 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -25,8 +25,10 @@ #define MTD_ERASE_DONE 0x08 #define MTD_ERASE_FAILED 0x10 +#define MTD_FAIL_ADDR_UNKNOWN 0xffffffff + /* If the erase fails, fail_addr might indicate exactly which block failed. If - fail_addr = 0xffffffff, the failure was not at the device level or was not + fail_addr = MTD_FAIL_ADDR_UNKNOWN, the failure was not at the device level or was not specific to any particular block. */ struct erase_info { struct mtd_info *mtd; -- cgit v1.2.3 From 36cd4fb5d277f34fe9e4db0deac2d4efd7dff735 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 6 Aug 2008 10:08:46 +0300 Subject: [MTD] [OneNAND] Add OMAP2 / OMAP3 OneNAND driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This driver had resided in the OMAP tree but is now to be in MTD. Original authors were: Jarkko Lavinen and Juha Yrjölä IRQ and DMA support written by Timo Teras Signed-off-by: Adrian Hunter Signed-off-by: David Woodhouse --- drivers/mtd/onenand/Kconfig | 7 + drivers/mtd/onenand/Makefile | 1 + drivers/mtd/onenand/omap2.c | 777 ++++++++++++++++++++++++++++++++++++ include/asm-arm/arch-omap/onenand.h | 7 +- 4 files changed, 791 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/onenand/omap2.c (limited to 'include') diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig index b94a61b670d1..79fa79e8f8de 100644 --- a/drivers/mtd/onenand/Kconfig +++ b/drivers/mtd/onenand/Kconfig @@ -27,6 +27,13 @@ config MTD_ONENAND_GENERIC help Support for OneNAND flash via platform device driver. +config MTD_ONENAND_OMAP2 + tristate "OneNAND on OMAP2/OMAP3 support" + depends on MTD_ONENAND && (ARCH_OMAP2 || ARCH_OMAP3) + help + Support for a OneNAND flash device connected to an OMAP2/OMAP3 CPU + via the GPMC memory controller. + config MTD_ONENAND_OTP bool "OneNAND OTP Support" select HAVE_MTD_OTP diff --git a/drivers/mtd/onenand/Makefile b/drivers/mtd/onenand/Makefile index 4d2eacfd7e11..64b6cc61a520 100644 --- a/drivers/mtd/onenand/Makefile +++ b/drivers/mtd/onenand/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_MTD_ONENAND) += onenand.o # Board specific. obj-$(CONFIG_MTD_ONENAND_GENERIC) += generic.o +obj-$(CONFIG_MTD_ONENAND_OMAP2) += omap2.o # Simulator obj-$(CONFIG_MTD_ONENAND_SIM) += onenand_sim.o diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c new file mode 100644 index 000000000000..40153ace5df6 --- /dev/null +++ b/drivers/mtd/onenand/omap2.c @@ -0,0 +1,777 @@ +/* + * linux/drivers/mtd/onenand/omap2.c + * + * OneNAND driver for OMAP2 / OMAP3 + * + * Copyright © 2005-2006 Nokia Corporation + * + * Author: Jarkko Lavinen and Juha Yrjölä + * IRQ and DMA support written by Timo Teras + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; see the file COPYING. If not, write to the Free Software + * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define DRIVER_NAME "omap2-onenand" + +#define ONENAND_IO_SIZE SZ_128K +#define ONENAND_BUFRAM_SIZE (1024 * 5) + +struct omap2_onenand { + struct platform_device *pdev; + int gpmc_cs; + unsigned long phys_base; + int gpio_irq; + struct mtd_info mtd; + struct mtd_partition *parts; + struct onenand_chip onenand; + struct completion irq_done; + struct completion dma_done; + int dma_channel; + int freq; + int (*setup)(void __iomem *base, int freq); +}; + +static void omap2_onenand_dma_cb(int lch, u16 ch_status, void *data) +{ + struct omap2_onenand *c = data; + + complete(&c->dma_done); +} + +static irqreturn_t omap2_onenand_interrupt(int irq, void *dev_id) +{ + struct omap2_onenand *c = dev_id; + + complete(&c->irq_done); + + return IRQ_HANDLED; +} + +static inline unsigned short read_reg(struct omap2_onenand *c, int reg) +{ + return readw(c->onenand.base + reg); +} + +static inline void write_reg(struct omap2_onenand *c, unsigned short value, + int reg) +{ + writew(value, c->onenand.base + reg); +} + +static void wait_err(char *msg, int state, unsigned int ctrl, unsigned int intr) +{ + printk(KERN_ERR "onenand_wait: %s! state %d ctrl 0x%04x intr 0x%04x\n", + msg, state, ctrl, intr); +} + +static void wait_warn(char *msg, int state, unsigned int ctrl, + unsigned int intr) +{ + printk(KERN_WARNING "onenand_wait: %s! state %d ctrl 0x%04x " + "intr 0x%04x\n", msg, state, ctrl, intr); +} + +static int omap2_onenand_wait(struct mtd_info *mtd, int state) +{ + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + unsigned int intr = 0; + unsigned int ctrl; + unsigned long timeout; + u32 syscfg; + + if (state == FL_RESETING) { + int i; + + for (i = 0; i < 20; i++) { + udelay(1); + intr = read_reg(c, ONENAND_REG_INTERRUPT); + if (intr & ONENAND_INT_MASTER) + break; + } + ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); + if (ctrl & ONENAND_CTRL_ERROR) { + wait_err("controller error", state, ctrl, intr); + return -EIO; + } + if (!(intr & ONENAND_INT_RESET)) { + wait_err("timeout", state, ctrl, intr); + return -EIO; + } + return 0; + } + + if (state != FL_READING) { + int result; + + /* Turn interrupts on */ + syscfg = read_reg(c, ONENAND_REG_SYS_CFG1); + syscfg |= ONENAND_SYS_CFG1_IOBE; + write_reg(c, syscfg, ONENAND_REG_SYS_CFG1); + + INIT_COMPLETION(c->irq_done); + if (c->gpio_irq) { + result = omap_get_gpio_datain(c->gpio_irq); + if (result == -1) { + ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); + intr = read_reg(c, ONENAND_REG_INTERRUPT); + wait_err("gpio error", state, ctrl, intr); + return -EIO; + } + } else + result = 0; + if (result == 0) { + int retry_cnt = 0; +retry: + result = wait_for_completion_timeout(&c->irq_done, + msecs_to_jiffies(20)); + if (result == 0) { + /* Timeout after 20ms */ + ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); + if (ctrl & ONENAND_CTRL_ONGO) { + /* + * The operation seems to be still going + * so give it some more time. + */ + retry_cnt += 1; + if (retry_cnt < 3) + goto retry; + intr = read_reg(c, + ONENAND_REG_INTERRUPT); + wait_err("timeout", state, ctrl, intr); + return -EIO; + } + intr = read_reg(c, ONENAND_REG_INTERRUPT); + if ((intr & ONENAND_INT_MASTER) == 0) + wait_warn("timeout", state, ctrl, intr); + } + } + } else { + /* Turn interrupts off */ + syscfg = read_reg(c, ONENAND_REG_SYS_CFG1); + syscfg &= ~ONENAND_SYS_CFG1_IOBE; + write_reg(c, syscfg, ONENAND_REG_SYS_CFG1); + + timeout = jiffies + msecs_to_jiffies(20); + while (time_before(jiffies, timeout)) { + intr = read_reg(c, ONENAND_REG_INTERRUPT); + if (intr & ONENAND_INT_MASTER) + break; + } + } + + intr = read_reg(c, ONENAND_REG_INTERRUPT); + ctrl = read_reg(c, ONENAND_REG_CTRL_STATUS); + + if (intr & ONENAND_INT_READ) { + int ecc = read_reg(c, ONENAND_REG_ECC_STATUS); + + if (ecc) { + unsigned int addr1, addr8; + + addr1 = read_reg(c, ONENAND_REG_START_ADDRESS1); + addr8 = read_reg(c, ONENAND_REG_START_ADDRESS8); + if (ecc & ONENAND_ECC_2BIT_ALL) { + printk(KERN_ERR "onenand_wait: ECC error = " + "0x%04x, addr1 %#x, addr8 %#x\n", + ecc, addr1, addr8); + mtd->ecc_stats.failed++; + return -EBADMSG; + } else if (ecc & ONENAND_ECC_1BIT_ALL) { + printk(KERN_NOTICE "onenand_wait: correctable " + "ECC error = 0x%04x, addr1 %#x, " + "addr8 %#x\n", ecc, addr1, addr8); + mtd->ecc_stats.corrected++; + } + } + } else if (state == FL_READING) { + wait_err("timeout", state, ctrl, intr); + return -EIO; + } + + if (ctrl & ONENAND_CTRL_ERROR) { + wait_err("controller error", state, ctrl, intr); + if (ctrl & ONENAND_CTRL_LOCK) + printk(KERN_ERR "onenand_wait: " + "Device is write protected!!!\n"); + return -EIO; + } + + if (ctrl & 0xFE9F) + wait_warn("unexpected controller status", state, ctrl, intr); + + return 0; +} + +static inline int omap2_onenand_bufferram_offset(struct mtd_info *mtd, int area) +{ + struct onenand_chip *this = mtd->priv; + + if (ONENAND_CURRENT_BUFFERRAM(this)) { + if (area == ONENAND_DATARAM) + return mtd->writesize; + if (area == ONENAND_SPARERAM) + return mtd->oobsize; + } + + return 0; +} + +#if defined(CONFIG_ARCH_OMAP3) || defined(MULTI_OMAP2) + +static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, + unsigned char *buffer, int offset, + size_t count) +{ + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + struct onenand_chip *this = mtd->priv; + dma_addr_t dma_src, dma_dst; + int bram_offset; + unsigned long timeout; + void *buf = (void *)buffer; + size_t xtra; + volatile unsigned *done; + + bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; + if (bram_offset & 3 || (size_t)buf & 3 || count < 384) + goto out_copy; + + if (buf >= high_memory) { + struct page *p1; + + if (((size_t)buf & PAGE_MASK) != + ((size_t)(buf + count - 1) & PAGE_MASK)) + goto out_copy; + p1 = vmalloc_to_page(buf); + if (!p1) + goto out_copy; + buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK); + } + + xtra = count & 3; + if (xtra) { + count -= xtra; + memcpy(buf + count, this->base + bram_offset + count, xtra); + } + + dma_src = c->phys_base + bram_offset; + dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE); + if (dma_mapping_error(&c->pdev->dev, dma_dst)) { + dev_err(&c->pdev->dev, + "Couldn't DMA map a %d byte buffer\n", + count); + goto out_copy; + } + + omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32, + count >> 2, 1, 0, 0, 0); + omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_src, 0, 0); + omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_dst, 0, 0); + + INIT_COMPLETION(c->dma_done); + omap_start_dma(c->dma_channel); + + timeout = jiffies + msecs_to_jiffies(20); + done = &c->dma_done.done; + while (time_before(jiffies, timeout)) + if (*done) + break; + + dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); + + if (!*done) { + dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + goto out_copy; + } + + return 0; + +out_copy: + memcpy(buf, this->base + bram_offset, count); + return 0; +} + +static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, + const unsigned char *buffer, + int offset, size_t count) +{ + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + struct onenand_chip *this = mtd->priv; + dma_addr_t dma_src, dma_dst; + int bram_offset; + unsigned long timeout; + void *buf = (void *)buffer; + volatile unsigned *done; + + bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; + if (bram_offset & 3 || (size_t)buf & 3 || count < 384) + goto out_copy; + + /* panic_write() may be in an interrupt context */ + if (in_interrupt()) + goto out_copy; + + if (buf >= high_memory) { + struct page *p1; + + if (((size_t)buf & PAGE_MASK) != + ((size_t)(buf + count - 1) & PAGE_MASK)) + goto out_copy; + p1 = vmalloc_to_page(buf); + if (!p1) + goto out_copy; + buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK); + } + + dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE); + dma_dst = c->phys_base + bram_offset; + if (dma_mapping_error(&c->pdev->dev, dma_dst)) { + dev_err(&c->pdev->dev, + "Couldn't DMA map a %d byte buffer\n", + count); + return -1; + } + + omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32, + count >> 2, 1, 0, 0, 0); + omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_src, 0, 0); + omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_dst, 0, 0); + + INIT_COMPLETION(c->dma_done); + omap_start_dma(c->dma_channel); + + timeout = jiffies + msecs_to_jiffies(20); + done = &c->dma_done.done; + while (time_before(jiffies, timeout)) + if (*done) + break; + + dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE); + + if (!*done) { + dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + goto out_copy; + } + + return 0; + +out_copy: + memcpy(this->base + bram_offset, buf, count); + return 0; +} + +#else + +int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, + unsigned char *buffer, int offset, + size_t count); + +int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, + const unsigned char *buffer, + int offset, size_t count); + +#endif + +#if defined(CONFIG_ARCH_OMAP2) || defined(MULTI_OMAP2) + +static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, + unsigned char *buffer, int offset, + size_t count) +{ + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + struct onenand_chip *this = mtd->priv; + dma_addr_t dma_src, dma_dst; + int bram_offset; + + bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; + /* DMA is not used. Revisit PM requirements before enabling it. */ + if (1 || (c->dma_channel < 0) || + ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) || + (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) { + memcpy(buffer, (__force void *)(this->base + bram_offset), + count); + return 0; + } + + dma_src = c->phys_base + bram_offset; + dma_dst = dma_map_single(&c->pdev->dev, buffer, count, + DMA_FROM_DEVICE); + if (dma_mapping_error(&c->pdev->dev, dma_dst)) { + dev_err(&c->pdev->dev, + "Couldn't DMA map a %d byte buffer\n", + count); + return -1; + } + + omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S32, + count / 4, 1, 0, 0, 0); + omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_src, 0, 0); + omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_dst, 0, 0); + + INIT_COMPLETION(c->dma_done); + omap_start_dma(c->dma_channel); + wait_for_completion(&c->dma_done); + + dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); + + return 0; +} + +static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, + const unsigned char *buffer, + int offset, size_t count) +{ + struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); + struct onenand_chip *this = mtd->priv; + dma_addr_t dma_src, dma_dst; + int bram_offset; + + bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; + /* DMA is not used. Revisit PM requirements before enabling it. */ + if (1 || (c->dma_channel < 0) || + ((void *) buffer >= (void *) high_memory) || (bram_offset & 3) || + (((unsigned int) buffer) & 3) || (count < 1024) || (count & 3)) { + memcpy((__force void *)(this->base + bram_offset), buffer, + count); + return 0; + } + + dma_src = dma_map_single(&c->pdev->dev, (void *) buffer, count, + DMA_TO_DEVICE); + dma_dst = c->phys_base + bram_offset; + if (dma_mapping_error(&c->pdev->dev, dma_dst)) { + dev_err(&c->pdev->dev, + "Couldn't DMA map a %d byte buffer\n", + count); + return -1; + } + + omap_set_dma_transfer_params(c->dma_channel, OMAP_DMA_DATA_TYPE_S16, + count / 2, 1, 0, 0, 0); + omap_set_dma_src_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_src, 0, 0); + omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, + dma_dst, 0, 0); + + INIT_COMPLETION(c->dma_done); + omap_start_dma(c->dma_channel); + wait_for_completion(&c->dma_done); + + dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_TO_DEVICE); + + return 0; +} + +#else + +int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, + unsigned char *buffer, int offset, + size_t count); + +int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, + const unsigned char *buffer, + int offset, size_t count); + +#endif + +static struct platform_driver omap2_onenand_driver; + +static int __adjust_timing(struct device *dev, void *data) +{ + int ret = 0; + struct omap2_onenand *c; + + c = dev_get_drvdata(dev); + + BUG_ON(c->setup == NULL); + + /* DMA is not in use so this is all that is needed */ + /* Revisit for OMAP3! */ + ret = c->setup(c->onenand.base, c->freq); + + return ret; +} + +int omap2_onenand_rephase(void) +{ + return driver_for_each_device(&omap2_onenand_driver.driver, NULL, + NULL, __adjust_timing); +} + +static void __devexit omap2_onenand_shutdown(struct platform_device *pdev) +{ + struct omap2_onenand *c = dev_get_drvdata(&pdev->dev); + + /* With certain content in the buffer RAM, the OMAP boot ROM code + * can recognize the flash chip incorrectly. Zero it out before + * soft reset. + */ + memset((__force void *)c->onenand.base, 0, ONENAND_BUFRAM_SIZE); +} + +static int __devinit omap2_onenand_probe(struct platform_device *pdev) +{ + struct omap_onenand_platform_data *pdata; + struct omap2_onenand *c; + int r; + + pdata = pdev->dev.platform_data; + if (pdata == NULL) { + dev_err(&pdev->dev, "platform data missing\n"); + return -ENODEV; + } + + c = kzalloc(sizeof(struct omap2_onenand), GFP_KERNEL); + if (!c) + return -ENOMEM; + + init_completion(&c->irq_done); + init_completion(&c->dma_done); + c->gpmc_cs = pdata->cs; + c->gpio_irq = pdata->gpio_irq; + c->dma_channel = pdata->dma_channel; + if (c->dma_channel < 0) { + /* if -1, don't use DMA */ + c->gpio_irq = 0; + } + + r = gpmc_cs_request(c->gpmc_cs, ONENAND_IO_SIZE, &c->phys_base); + if (r < 0) { + dev_err(&pdev->dev, "Cannot request GPMC CS\n"); + goto err_kfree; + } + + if (request_mem_region(c->phys_base, ONENAND_IO_SIZE, + pdev->dev.driver->name) == NULL) { + dev_err(&pdev->dev, "Cannot reserve memory region at 0x%08lx, " + "size: 0x%x\n", c->phys_base, ONENAND_IO_SIZE); + r = -EBUSY; + goto err_free_cs; + } + c->onenand.base = ioremap(c->phys_base, ONENAND_IO_SIZE); + if (c->onenand.base == NULL) { + r = -ENOMEM; + goto err_release_mem_region; + } + + if (pdata->onenand_setup != NULL) { + r = pdata->onenand_setup(c->onenand.base, c->freq); + if (r < 0) { + dev_err(&pdev->dev, "Onenand platform setup failed: " + "%d\n", r); + goto err_iounmap; + } + c->setup = pdata->onenand_setup; + } + + if (c->gpio_irq) { + if ((r = omap_request_gpio(c->gpio_irq)) < 0) { + dev_err(&pdev->dev, "Failed to request GPIO%d for " + "OneNAND\n", c->gpio_irq); + goto err_iounmap; + } + omap_set_gpio_direction(c->gpio_irq, 1); + + if ((r = request_irq(OMAP_GPIO_IRQ(c->gpio_irq), + omap2_onenand_interrupt, IRQF_TRIGGER_RISING, + pdev->dev.driver->name, c)) < 0) + goto err_release_gpio; + } + + if (c->dma_channel >= 0) { + r = omap_request_dma(0, pdev->dev.driver->name, + omap2_onenand_dma_cb, (void *) c, + &c->dma_channel); + if (r == 0) { + omap_set_dma_write_mode(c->dma_channel, + OMAP_DMA_WRITE_NON_POSTED); + omap_set_dma_src_data_pack(c->dma_channel, 1); + omap_set_dma_src_burst_mode(c->dma_channel, + OMAP_DMA_DATA_BURST_8); + omap_set_dma_dest_data_pack(c->dma_channel, 1); + omap_set_dma_dest_burst_mode(c->dma_channel, + OMAP_DMA_DATA_BURST_8); + } else { + dev_info(&pdev->dev, + "failed to allocate DMA for OneNAND, " + "using PIO instead\n"); + c->dma_channel = -1; + } + } + + dev_info(&pdev->dev, "initializing on CS%d, phys base 0x%08lx, virtual " + "base %p\n", c->gpmc_cs, c->phys_base, + c->onenand.base); + + c->pdev = pdev; + c->mtd.name = pdev->dev.bus_id; + c->mtd.priv = &c->onenand; + c->mtd.owner = THIS_MODULE; + + if (c->dma_channel >= 0) { + struct onenand_chip *this = &c->onenand; + + this->wait = omap2_onenand_wait; + if (cpu_is_omap34xx()) { + this->read_bufferram = omap3_onenand_read_bufferram; + this->write_bufferram = omap3_onenand_write_bufferram; + } else { + this->read_bufferram = omap2_onenand_read_bufferram; + this->write_bufferram = omap2_onenand_write_bufferram; + } + } + + if ((r = onenand_scan(&c->mtd, 1)) < 0) + goto err_release_dma; + + switch ((c->onenand.version_id >> 4) & 0xf) { + case 0: + c->freq = 40; + break; + case 1: + c->freq = 54; + break; + case 2: + c->freq = 66; + break; + case 3: + c->freq = 83; + break; + } + +#ifdef CONFIG_MTD_PARTITIONS + if (pdata->parts != NULL) + r = add_mtd_partitions(&c->mtd, pdata->parts, + pdata->nr_parts); + else +#endif + r = add_mtd_device(&c->mtd); + if (r < 0) + goto err_release_onenand; + + platform_set_drvdata(pdev, c); + + return 0; + +err_release_onenand: + onenand_release(&c->mtd); +err_release_dma: + if (c->dma_channel != -1) + omap_free_dma(c->dma_channel); + if (c->gpio_irq) + free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c); +err_release_gpio: + if (c->gpio_irq) + omap_free_gpio(c->gpio_irq); +err_iounmap: + iounmap(c->onenand.base); +err_release_mem_region: + release_mem_region(c->phys_base, ONENAND_IO_SIZE); +err_free_cs: + gpmc_cs_free(c->gpmc_cs); +err_kfree: + kfree(c); + + return r; +} + +static int __devexit omap2_onenand_remove(struct platform_device *pdev) +{ + struct omap2_onenand *c = dev_get_drvdata(&pdev->dev); + + BUG_ON(c == NULL); + +#ifdef CONFIG_MTD_PARTITIONS + if (c->parts) + del_mtd_partitions(&c->mtd); + else + del_mtd_device(&c->mtd); +#else + del_mtd_device(&c->mtd); +#endif + + onenand_release(&c->mtd); + if (c->dma_channel != -1) + omap_free_dma(c->dma_channel); + omap2_onenand_shutdown(pdev); + platform_set_drvdata(pdev, NULL); + if (c->gpio_irq) { + free_irq(OMAP_GPIO_IRQ(c->gpio_irq), c); + omap_free_gpio(c->gpio_irq); + } + iounmap(c->onenand.base); + release_mem_region(c->phys_base, ONENAND_IO_SIZE); + kfree(c); + + return 0; +} + +static struct platform_driver omap2_onenand_driver = { + .probe = omap2_onenand_probe, + .remove = omap2_onenand_remove, + .shutdown = omap2_onenand_shutdown, + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, +}; + +static int __init omap2_onenand_init(void) +{ + printk(KERN_INFO "OneNAND driver initializing\n"); + return platform_driver_register(&omap2_onenand_driver); +} + +static void __exit omap2_onenand_exit(void) +{ + platform_driver_unregister(&omap2_onenand_driver); +} + +module_init(omap2_onenand_init); +module_exit(omap2_onenand_exit); + +MODULE_ALIAS(DRIVER_NAME); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jarkko Lavinen "); +MODULE_DESCRIPTION("Glue layer for OneNAND flash on OMAP2 / OMAP3"); diff --git a/include/asm-arm/arch-omap/onenand.h b/include/asm-arm/arch-omap/onenand.h index 6c959d0ce470..e30237117b69 100644 --- a/include/asm-arm/arch-omap/onenand.h +++ b/include/asm-arm/arch-omap/onenand.h @@ -16,6 +16,11 @@ struct omap_onenand_platform_data { int gpio_irq; struct mtd_partition *parts; int nr_parts; - int (*onenand_setup)(void __iomem *); + int (*onenand_setup)(void __iomem *, int freq); int dma_channel; }; + +int omap2_onenand_rephase(void); + +#define ONENAND_MAX_PARTITIONS 8 + -- cgit v1.2.3 From 17c1d2be28e485c0c8b09661db39d5bf2605069d Mon Sep 17 00:00:00 2001 From: Alexey Korolev Date: Wed, 20 Aug 2008 22:32:08 +0100 Subject: [MTD] [NAND] Fix missing kernel-doc [Reported by Randy Dunlap] Signed-off-by: Alexey Korolev Signed-off-by: David Woodhouse --- drivers/mtd/nand/nand_base.c | 6 +++--- drivers/mtd/nand/nand_ecc.c | 6 +++--- include/linux/mtd/nand.h | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 582280560c89..d303db39c48d 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -801,9 +801,9 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, * nand_read_subpage - [REPLACABLE] software ecc based sub-page read function * @mtd: mtd info structure * @chip: nand chip info structure - * @dataofs offset of requested data within the page - * @readlen data length - * @buf: buffer to store read data + * @data_offs: offset of requested data within the page + * @readlen: data length + * @bufpoi: buffer to store read data */ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi) { diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c index d99e569e999f..fd19787c9ce7 100644 --- a/drivers/mtd/nand/nand_ecc.c +++ b/drivers/mtd/nand/nand_ecc.c @@ -150,8 +150,8 @@ static const char addressbits[256] = { /** * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block * @mtd: MTD block structure (unused) - * @dat: raw data - * @ecc_code: buffer for ECC + * @buf: input buffer with raw data + * @code: output buffer with ECC */ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf, unsigned char *code) @@ -390,7 +390,7 @@ EXPORT_SYMBOL(nand_calculate_ecc); /** * nand_correct_data - [NAND Interface] Detect and correct bit error(s) * @mtd: MTD block structure (unused) - * @dat: raw data read from the chip + * @buf: raw data read from the chip * @read_ecc: ECC from the chip * @calc_ecc: the ECC calculated from raw data * diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 81774e5facf4..733d3f3b4eb8 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -248,6 +248,7 @@ struct nand_hw_control { * @read_page_raw: function to read a raw page without ECC * @write_page_raw: function to write a raw page without ECC * @read_page: function to read a page according to the ecc generator requirements + * @read_subpage: function to read parts of the page covered by ECC. * @write_page: function to write a page according to the ecc generator requirements * @read_oob: function to read chip OOB data * @write_oob: function to write chip OOB data -- cgit v1.2.3 From 1aa5dfb751d275ae7117d3b73ac423b4a46f2a73 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 20 Aug 2008 16:37:28 -0700 Subject: clocksource: keep track of original clocksource frequency The clocksource frequency is represented by clocksource->mult/2^(clocksource->shift). Currently, when NTP makes adjustments to the clock frequency, they are made directly to the mult value. This has the drawback that once changed, we cannot know what the orignal mult value was, or how much adjustment has been applied. This property causes problems in calculating proper ntp intervals when switching back and forth between clocksources. This patch separates the current mult value into a mult and mult_orig pair. The mult_orig value stays constant, while the ntp clocksource adjustments are done only to the mult value. This allows for correct ntp interval calculation and additionally lays the groundwork for a new notion of time, what I'm calling the monotonic-raw time, which is introduced in a following patch. Signed-off-by: John Stultz Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 11 +++++++---- kernel/time/clocksource.c | 3 +++ kernel/time/jiffies.c | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 55e434feec99..f0a7fb984413 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -45,7 +45,8 @@ struct clocksource; * @read: returns a cycle value * @mask: bitmask for two's complement * subtraction of non 64 bit counters - * @mult: cycle to nanosecond multiplier + * @mult: cycle to nanosecond multiplier (adjusted by NTP) + * @mult_orig: cycle to nanosecond multiplier (unadjusted by NTP) * @shift: cycle to nanosecond divisor (power of two) * @flags: flags describing special properties * @vread: vsyscall based read @@ -63,6 +64,7 @@ struct clocksource { cycle_t (*read)(void); cycle_t mask; u32 mult; + u32 mult_orig; u32 shift; unsigned long flags; cycle_t (*vread)(void); @@ -201,16 +203,17 @@ static inline void clocksource_calculate_interval(struct clocksource *c, { u64 tmp; - /* XXX - All of this could use a whole lot of optimization */ + /* Do the ns -> cycle conversion first, using original mult */ tmp = length_nsec; tmp <<= c->shift; - tmp += c->mult/2; - do_div(tmp, c->mult); + tmp += c->mult_orig/2; + do_div(tmp, c->mult_orig); c->cycle_interval = (cycle_t)tmp; if (c->cycle_interval == 0) c->cycle_interval = 1; + /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ c->xtime_interval = (u64)c->cycle_interval * c->mult; } diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 093d4acf993b..9ed2eec97526 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -325,6 +325,9 @@ int clocksource_register(struct clocksource *c) unsigned long flags; int ret; + /* save mult_orig on registration */ + c->mult_orig = c->mult; + spin_lock_irqsave(&clocksource_lock, flags); ret = clocksource_enqueue(c); if (!ret) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 4c256fdb8875..1ca99557e929 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -61,6 +61,7 @@ struct clocksource clocksource_jiffies = { .read = jiffies_read, .mask = 0xffffffff, /*32bits*/ .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ + .mult_orig = NSEC_PER_JIFFY << JIFFIES_SHIFT, .shift = JIFFIES_SHIFT, }; -- cgit v1.2.3 From 2d42244ae71d6c7b0884b5664cf2eda30fb2ae68 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 20 Aug 2008 16:37:30 -0700 Subject: clocksource: introduce CLOCK_MONOTONIC_RAW In talking with Josip Loncaric, and his work on clock synchronization (see btime.sf.net), he mentioned that for really close synchronization, it is useful to have access to "hardware time", that is a notion of time that is not in any way adjusted by the clock slewing done to keep close time sync. Part of the issue is if we are using the kernel's ntp adjusted representation of time in order to measure how we should correct time, we can run into what Paul McKenney aptly described as "Painting a road using the lines we're painting as the guide". I had been thinking of a similar problem, and was trying to come up with a way to give users access to a purely hardware based time representation that avoided users having to know the underlying frequency and mask values needed to deal with the wide variety of possible underlying hardware counters. My solution is to introduce CLOCK_MONOTONIC_RAW. This exposes a nanosecond based time value, that increments starting at bootup and has no frequency adjustments made to it what so ever. The time is accessed from userspace via the posix_clock_gettime() syscall, passing CLOCK_MONOTONIC_RAW as the clock_id. Signed-off-by: John Stultz Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/clocksource.h | 3 +++ include/linux/time.h | 2 ++ kernel/posix-timers.c | 15 +++++++++++++++ kernel/time/timekeeping.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 64 insertions(+) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index f0a7fb984413..f88d32f8ff7c 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -79,6 +79,7 @@ struct clocksource { /* timekeeping specific data, ignore */ cycle_t cycle_interval; u64 xtime_interval; + u32 raw_interval; /* * Second part is written at each timer interrupt * Keep it in a different cache line to dirty no @@ -87,6 +88,7 @@ struct clocksource { cycle_t cycle_last ____cacheline_aligned_in_smp; u64 xtime_nsec; s64 error; + struct timespec raw_time; #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ @@ -215,6 +217,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c, /* Go back from cycles -> shifted ns, this time use ntp adjused mult */ c->xtime_interval = (u64)c->cycle_interval * c->mult; + c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift; } diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82e..205f974b9ebf 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -117,6 +117,7 @@ extern int do_setitimer(int which, struct itimerval *value, extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); extern void getnstimeofday(struct timespec *tv); +extern void getrawmonotonic(struct timespec *ts); extern void getboottime(struct timespec *ts); extern void monotonic_to_bootbased(struct timespec *ts); @@ -214,6 +215,7 @@ struct itimerval { #define CLOCK_MONOTONIC 1 #define CLOCK_PROCESS_CPUTIME_ID 2 #define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 /* * The IDs of various hardware clocks: diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index e36d5798cbff..d3c66b53dff6 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -222,6 +222,15 @@ static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) return 0; } +/* + * Get monotonic time for posix timers + */ +static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) +{ + getrawmonotonic(tp); + return 0; +} + /* * Initialize everything, well, just everything in Posix clocks/timers ;) */ @@ -235,9 +244,15 @@ static __init int init_posix_timers(void) .clock_get = posix_ktime_get_ts, .clock_set = do_posix_clock_nosettime, }; + struct k_clock clock_monotonic_raw = { + .clock_getres = hrtimer_get_res, + .clock_get = posix_get_monotonic_raw, + .clock_set = do_posix_clock_nosettime, + }; register_posix_clock(CLOCK_REALTIME, &clock_realtime); register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); + register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 83d3555a6998..5099c95b8aa2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -75,6 +75,9 @@ static void clocksource_forward_now(void) nsec = cyc2ns(clock, cycle_delta); timespec_add_ns(&xtime, nsec); + + nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; + clock->raw_time.tv_nsec += nsec; } /** @@ -183,6 +186,8 @@ static void change_clocksource(void) clocksource_forward_now(); + new->raw_time = clock->raw_time; + clock = new; clock->cycle_last = 0; clock->cycle_last = clocksource_read(new); @@ -204,6 +209,39 @@ static inline void clocksource_forward_now(void) { } static inline void change_clocksource(void) { } #endif +/** + * getrawmonotonic - Returns the raw monotonic time in a timespec + * @ts: pointer to the timespec to be set + * + * Returns the raw monotonic time (completely un-modified by ntp) + */ +void getrawmonotonic(struct timespec *ts) +{ + unsigned long seq; + s64 nsecs; + cycle_t cycle_now, cycle_delta; + + do { + seq = read_seqbegin(&xtime_lock); + + /* read clocksource: */ + cycle_now = clocksource_read(clock); + + /* calculate the delta since the last update_wall_time: */ + cycle_delta = (cycle_now - clock->cycle_last) & clock->mask; + + /* convert to nanoseconds: */ + nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift; + + *ts = clock->raw_time; + + } while (read_seqretry(&xtime_lock, seq)); + + timespec_add_ns(ts, nsecs); +} +EXPORT_SYMBOL(getrawmonotonic); + + /** * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres */ @@ -466,6 +504,12 @@ void update_wall_time(void) second_overflow(); } + clock->raw_time.tv_nsec += clock->raw_interval; + if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) { + clock->raw_time.tv_nsec -= NSEC_PER_SEC; + clock->raw_time.tv_sec++; + } + /* accumulate error between NTP and clock interval */ clock->error += tick_length; clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift); -- cgit v1.2.3 From 916c7a855174e3b53d182b97a26b2e27a29726a1 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 20 Aug 2008 16:46:08 -0700 Subject: ntp: fix ADJ_OFFSET_SS_READ bug and do_adjtimex() cleanup Thanks to the review by Michael Kerrisk a bug in the recent ADJ_OFFSET_SS_READ option was discovered, where the ntp time_offset was inadvertently set by it. This fixes this by making the adjtime code more separate from the ntp_adjtime code (both of which really want to be separate syscalls). Signed-off-by: Roman Zippel Signed-off-by: Andrew Morton Acked-by: John Stultz Signed-off-by: Ingo Molnar --- include/linux/timex.h | 9 +++++- kernel/time/ntp.c | 76 +++++++++++++++++++++++++++------------------------ 2 files changed, 48 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index fc6035d29d56..c00bcdd3ae42 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -141,8 +141,15 @@ struct timex { #define ADJ_MICRO 0x1000 /* select microsecond resolution */ #define ADJ_NANO 0x2000 /* select nanosecond resolution */ #define ADJ_TICK 0x4000 /* tick value */ + +#ifdef __KERNEL__ +#define ADJ_ADJTIME 0x8000 /* switch between adjtime/adjtimex modes */ +#define ADJ_OFFSET_SINGLESHOT 0x0001 /* old-fashioned adjtime */ +#define ADJ_OFFSET_READONLY 0x2000 /* read-only adjtime */ +#else #define ADJ_OFFSET_SINGLESHOT 0x8001 /* old-fashioned adjtime */ -#define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ +#define ADJ_OFFSET_SS_READ 0xa001 /* read-only adjtime */ +#endif /* xntp 3.4 compatibility names */ #define MOD_OFFSET ADJ_OFFSET diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 5125ddd8196b..c6921aa1a42a 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -277,38 +277,50 @@ static inline void notify_cmos_timer(void) { } int do_adjtimex(struct timex *txc) { struct timespec ts; - long save_adjust, sec; int result; - /* In order to modify anything, you gotta be super-user! */ - if (txc->modes && !capable(CAP_SYS_TIME)) - return -EPERM; - - /* Now we validate the data before disabling interrupts */ - - if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT) { + /* Validate the data before disabling interrupts */ + if (txc->modes & ADJ_ADJTIME) { /* singleshot must not be used with any other mode bits */ - if (txc->modes & ~ADJ_OFFSET_SS_READ) + if (!(txc->modes & ADJ_OFFSET_SINGLESHOT)) return -EINVAL; + if (!(txc->modes & ADJ_OFFSET_READONLY) && + !capable(CAP_SYS_TIME)) + return -EPERM; + } else { + /* In order to modify anything, you gotta be super-user! */ + if (txc->modes && !capable(CAP_SYS_TIME)) + return -EPERM; + + /* if the quartz is off by more than 10% something is VERY wrong! */ + if (txc->modes & ADJ_TICK && + (txc->tick < 900000/USER_HZ || + txc->tick > 1100000/USER_HZ)) + return -EINVAL; + + if (txc->modes & ADJ_STATUS && time_state != TIME_OK) + hrtimer_cancel(&leap_timer); } - /* if the quartz is off by more than 10% something is VERY wrong ! */ - if (txc->modes & ADJ_TICK) - if (txc->tick < 900000/USER_HZ || - txc->tick > 1100000/USER_HZ) - return -EINVAL; - - if (time_state != TIME_OK && txc->modes & ADJ_STATUS) - hrtimer_cancel(&leap_timer); getnstimeofday(&ts); write_seqlock_irq(&xtime_lock); - /* Save for later - semantics of adjtime is to return old value */ - save_adjust = time_adjust; - /* If there are input parameters, then process them */ + if (txc->modes & ADJ_ADJTIME) { + long save_adjust = time_adjust; + + if (!(txc->modes & ADJ_OFFSET_READONLY)) { + /* adjtime() is independent from ntp_adjtime() */ + time_adjust = txc->offset; + ntp_update_frequency(); + } + txc->offset = save_adjust; + goto adj_done; + } if (txc->modes) { + long sec; + if (txc->modes & ADJ_STATUS) { if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) { @@ -375,13 +387,8 @@ int do_adjtimex(struct timex *txc) if (txc->modes & ADJ_TAI && txc->constant > 0) time_tai = txc->constant; - if (txc->modes & ADJ_OFFSET) { - if (txc->modes == ADJ_OFFSET_SINGLESHOT) - /* adjtime() is independent from ntp_adjtime() */ - time_adjust = txc->offset; - else - ntp_update_offset(txc->offset); - } + if (txc->modes & ADJ_OFFSET) + ntp_update_offset(txc->offset); if (txc->modes & ADJ_TICK) tick_usec = txc->tick; @@ -389,19 +396,16 @@ int do_adjtimex(struct timex *txc) ntp_update_frequency(); } + txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, + NTP_SCALE_SHIFT); + if (!(time_status & STA_NANO)) + txc->offset /= NSEC_PER_USEC; + +adj_done: result = time_state; /* mostly `TIME_OK' */ if (time_status & (STA_UNSYNC|STA_CLOCKERR)) result = TIME_ERROR; - if ((txc->modes == ADJ_OFFSET_SINGLESHOT) || - (txc->modes == ADJ_OFFSET_SS_READ)) - txc->offset = save_adjust; - else { - txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ, - NTP_SCALE_SHIFT); - if (!(time_status & STA_NANO)) - txc->offset /= NSEC_PER_USEC; - } txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) * (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT); -- cgit v1.2.3 From 942ed161944b3476639916cf544e6975b29c985a Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 26 Aug 2008 21:09:59 +0100 Subject: power_supply: Add function to return system-wide power state Certain drivers benefit from knowing whether the system is on ac or battery, for instance when determining which backlight registers to read. This adds a simple call to determine whether there's an online power supply other than any batteries. Signed-off-by: Matthew Garrett Signed-off-by: Anton Vorontsov --- drivers/power/power_supply_core.c | 25 +++++++++++++++++++++++++ include/linux/power_supply.h | 6 ++++++ 2 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index cb1ccb472921..f44f5b608f6a 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -87,6 +87,30 @@ int power_supply_am_i_supplied(struct power_supply *psy) return error; } +static int __power_supply_is_system_supplied(struct device *dev, void *data) +{ + union power_supply_propval ret = {0,}; + struct power_supply *psy = dev_get_drvdata(dev); + + if (psy->type != POWER_SUPPLY_TYPE_BATTERY) { + if (psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &ret)) + return 0; + if (ret.intval) + return ret.intval; + } + return 0; +} + +int power_supply_is_system_supplied(void) +{ + int error; + + error = class_for_each_device(power_supply_class, NULL, NULL, + __power_supply_is_system_supplied); + + return error; +} + int power_supply_register(struct device *parent, struct power_supply *psy) { int rc = 0; @@ -148,6 +172,7 @@ static void __exit power_supply_class_exit(void) EXPORT_SYMBOL_GPL(power_supply_changed); EXPORT_SYMBOL_GPL(power_supply_am_i_supplied); +EXPORT_SYMBOL_GPL(power_supply_is_system_supplied); EXPORT_SYMBOL_GPL(power_supply_register); EXPORT_SYMBOL_GPL(power_supply_unregister); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index ea96ead1d39d..f9348cba6dc1 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -165,6 +165,12 @@ struct power_supply_info { extern void power_supply_changed(struct power_supply *psy); extern int power_supply_am_i_supplied(struct power_supply *psy); +#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE) +extern int power_supply_is_system_supplied(void); +#else +static inline int power_supply_is_system_supplied(void) { return -ENOSYS; } +#endif + extern int power_supply_register(struct device *parent, struct power_supply *psy); extern void power_supply_unregister(struct power_supply *psy); -- cgit v1.2.3 From 7bb67439bf6bd3782f07f1d7be1e63406453d5de Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 31 Aug 2008 08:05:58 -0700 Subject: select: Introduce a hrtimeout function This patch adds a schedule_hrtimeout() function, to be used by select() and poll() in a later patch. This function works similar to schedule_timeout() in most ways, but takes a timespec rather than jiffies. With a lot of contributions/fixes from Thomas Signed-off-by: Arjan van de Ven Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 ++ kernel/hrtimer.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cbb..becd17db1a1a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -346,6 +346,8 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); +extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); + /* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); extern void hrtimer_run_pending(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index b8e4dce80a74..782137dc755f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1678,3 +1678,68 @@ void __init hrtimers_init(void) #endif } +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + struct hrtimer_sleeper t; + + /* + * Optimize when a zero timeout value is given. It does not + * matter whether this is an absolute or a relative time. + */ + if (expires && !expires->tv64) { + __set_current_state(TASK_RUNNING); + return 0; + } + + /* + * A NULL parameter means "inifinte" + */ + if (!expires) { + schedule(); + __set_current_state(TASK_RUNNING); + return -EINTR; + } + + hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); + t.timer.expires = *expires; + + hrtimer_init_sleeper(&t, current); + + hrtimer_start(&t.timer, t.timer.expires, mode); + if (!hrtimer_active(&t.timer)) + t.task = NULL; + + if (likely(t.task)) + schedule(); + + hrtimer_cancel(&t.timer); + destroy_hrtimer_on_stack(&t.timer); + + __set_current_state(TASK_RUNNING); + + return !t.task ? 0 : -EINTR; +} +EXPORT_SYMBOL_GPL(schedule_hrtimeout); -- cgit v1.2.3 From df0cc0539b4127bd02f64de2c335b4af1fdb3845 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:09:53 -0700 Subject: select: add a timespec_add_safe() function For the select() rework, it's important to be able to add timespec structures in an overflow-safe manner. This patch adds a timespec_add_safe() function for this which is similar in operation to ktime_add_safe(), but works on a struct timespec. Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- include/linux/time.h | 4 ++++ kernel/time.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82e..726976478480 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -38,6 +38,8 @@ struct timezone { #define NSEC_PER_SEC 1000000000L #define FSEC_PER_SEC 1000000000000000L +#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) + static inline int timespec_equal(const struct timespec *a, const struct timespec *b) { @@ -72,6 +74,8 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon, const unsigned int min, const unsigned int sec); extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec); +extern struct timespec timespec_add_safe(const struct timespec lhs, + const struct timespec rhs); /* * sub = lhs - rhs, in normalized form diff --git a/kernel/time.c b/kernel/time.c index 6a08660b4fac..d63a4336fad6 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -669,3 +669,21 @@ EXPORT_SYMBOL(get_jiffies_64); #endif EXPORT_SYMBOL(jiffies); + +/* + * Add two timespec values and do a safety check for overflow. + * It's assumed that both values are valid (>= 0) + */ +struct timespec timespec_add_safe(const struct timespec lhs, + const struct timespec rhs) +{ + struct timespec res; + + set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + + if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec) + res.tv_sec = TIME_T_MAX; + + return res; +} -- cgit v1.2.3 From b773ad40aca5bd755ba886620842f16e8fef6d75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:16:57 -0700 Subject: select: add poll_select_set_timeout() and poll_select_copy_remaining() helpers This patch adds 2 helpers that will be used for the hrtimer based select/poll: poll_select_set_timeout() is a helper that takes a timeout (as a second, nanosecond pair) and turns that into a "struct timespec" that represents the absolute end time. This is a common operation in the many select() and poll() variants and needs various, common, sanity checks. poll_select_copy_remaining() is a helper that takes care of copying the remaining time to userspace, as select(), pselect() and ppoll() do. This function comes in both a natural and a compat implementation (due to datastructure differences). Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- fs/compat.c | 51 +++++++++++++++++++++++++++++++++++ fs/select.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/poll.h | 2 ++ 3 files changed, 128 insertions(+) (limited to 'include') diff --git a/fs/compat.c b/fs/compat.c index 075d0509970d..424767c954a0 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1436,6 +1436,57 @@ out_ret: #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec ts; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&ts); + ts = timespec_sub(*end_time, ts); + if (ts.tv_sec < 0) + ts.tv_sec = ts.tv_nsec = 0; + + if (timeval) { + struct compat_timeval rtv; + + rtv.tv_sec = ts.tv_sec; + rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + } else { + struct compat_timespec rts; + + rts.tv_sec = ts.tv_sec; + rts.tv_nsec = ts.tv_nsec; + + if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + } + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to * 64-bit unsigned longs. diff --git a/fs/select.c b/fs/select.c index da0e88201c3a..1180a6207789 100644 --- a/fs/select.c +++ b/fs/select.c @@ -130,6 +130,81 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, add_wait_queue(wait_address, &entry->wait); } +/** + * poll_select_set_timeout - helper function to setup the timeout value + * @to: pointer to timespec variable for the final timeout + * @sec: seconds (from user space) + * @nsec: nanoseconds (from user space) + * + * Note, we do not use a timespec for the user space value here, That + * way we can use the function for timeval and compat interfaces as well. + * + * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. + */ +int poll_select_set_timeout(struct timespec *to, long sec, long nsec) +{ + struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec}; + + if (!timespec_valid(&ts)) + return -EINVAL; + + /* Optimize for the zero timeout value here */ + if (!sec && !nsec) { + to->tv_sec = to->tv_nsec = 0; + } else { + ktime_get_ts(to); + *to = timespec_add_safe(*to, ts); + } + return 0; +} + +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec rts; + struct timeval rtv; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&rts); + rts = timespec_sub(*end_time, rts); + if (rts.tv_sec < 0) + rts.tv_sec = rts.tv_nsec = 0; + + if (timeval) { + rtv.tv_sec = rts.tv_sec; + rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + + } else if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + + + #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) diff --git a/include/linux/poll.h b/include/linux/poll.h index ef453828877a..f65de5128a9e 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -120,6 +120,8 @@ extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s64 *timeout); +extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec); + #endif /* KERNEL */ #endif /* _LINUX_POLL_H */ -- cgit v1.2.3 From be5dad20a55e054a35dac7f6f5f184dc72b379b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 31 Aug 2008 08:19:15 -0700 Subject: select: add a poll specific struct to the restart_block union with hrtimer poll/select, the signal restart data no longer is a single long representing a jiffies count, but it becomes a second/nanosecond pair that also needs to encode if there was a timeout at all or not. This patch adds a struct to the restart_block union for this purpose Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven --- include/linux/thread_info.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 38a56477f27a..e6b820f8b56b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -38,6 +38,14 @@ struct restart_block { #endif u64 expires; } nanosleep; + /* For poll */ + struct { + struct pollfd __user *ufds; + int nfds; + int has_timeout; + unsigned long tv_sec; + unsigned long tv_nsec; + } poll; }; }; -- cgit v1.2.3 From 8ff3e8e85fa6c312051134b3953e397feb639f51 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 31 Aug 2008 08:26:40 -0700 Subject: select: switch select() and poll() over to hrtimers With lots of help, input and cleanups from Thomas Gleixner This patch switches select() and poll() over to hrtimers. The core of the patch is replacing the "s64 timeout" with a "struct timespec end_time" in all the plumbing. But most of the diffstat comes from using the just introduced helpers: poll_select_set_timeout poll_select_copy_remaining timespec_add_safe which make manipulating the timespec easier and less error-prone. Signed-off-by: Arjan van de Ven Signed-off-by: Thomas Gleixner --- fs/compat.c | 136 ++++---------------------- fs/select.c | 263 +++++++++++++++++---------------------------------- include/linux/poll.h | 6 +- 3 files changed, 111 insertions(+), 294 deletions(-) (limited to 'include') diff --git a/fs/compat.c b/fs/compat.c index 424767c954a0..133ed7f5d681 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1568,7 +1568,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) int compat_core_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct timespec *end_time) { fd_set_bits fds; void *bits; @@ -1615,7 +1616,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); - ret = do_select(n, &fds, timeout); + ret = do_select(n, &fds, end_time); if (ret < 0) goto out; @@ -1641,7 +1642,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timeval __user *tvp) { - s64 timeout = -1; + struct timespec end_time, *to = NULL; struct compat_timeval tv; int ret; @@ -1649,43 +1650,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; - if (tv.tv_sec < 0 || tv.tv_usec < 0) + to = &end_time; + if (poll_select_set_timeout(to, tv.tv_sec, + tv.tv_usec * NSEC_PER_USEC)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ); - timeout += tv.tv_sec * HZ; - } } - ret = compat_core_sys_select(n, inp, outp, exp, &timeout); - - if (tvp) { - struct compat_timeval rtv; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - rtv.tv_sec = timeout; - if (compat_timeval_compare(&rtv, &tv) >= 0) - rtv = tv; - if (copy_to_user(tvp, &rtv, sizeof(rtv))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); return ret; } @@ -1698,15 +1670,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, { compat_sigset_t ss32; sigset_t ksigmask, sigsaved; - s64 timeout = MAX_SCHEDULE_TIMEOUT; struct compat_timespec ts; + struct timespec end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - if (ts.tv_sec < 0 || ts.tv_nsec < 0) + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } @@ -1721,51 +1694,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - do { - if (tsp) { - if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) { - timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); - timeout += ts.tv_sec * (unsigned long)HZ; - ts.tv_sec = 0; - ts.tv_nsec = 0; - } else { - ts.tv_sec -= MAX_SELECT_SECONDS; - timeout = MAX_SELECT_SECONDS * HZ; - } - } - - ret = compat_core_sys_select(n, inp, outp, exp, &timeout); - - } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); - - if (tsp) { - struct compat_timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - - rts.tv_sec = timeout / HZ; - rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); - if (rts.tv_nsec >= NSEC_PER_SEC) { - rts.tv_sec++; - rts.tv_nsec -= NSEC_PER_SEC; - } - if (compat_timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { /* @@ -1810,18 +1740,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, compat_sigset_t ss32; sigset_t ksigmask, sigsaved; struct compat_timespec ts; - s64 timeout = -1; + struct timespec end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - /* We assume that ts.tv_sec is always lower than - the number of seconds that can be expressed in - an s64. Otherwise the compiler bitches at us */ - timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); - timeout += ts.tv_sec * HZ; + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; } if (sigmask) { @@ -1835,7 +1763,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = do_sys_poll(ufds, nfds, &timeout); + ret = do_sys_poll(ufds, nfds, to); /* We can restart this syscall, usually */ if (ret == -EINTR) { @@ -1853,31 +1781,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); - if (tsp && timeout >= 0) { - struct compat_timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - /* Yes, we know it's actually an s64, but it's also positive. */ - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (compat_timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND && timeout >= 0) - ret = -EINTR; - } - } + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); return ret; } diff --git a/fs/select.c b/fs/select.c index 1180a6207789..f6dceb56793f 100644 --- a/fs/select.c +++ b/fs/select.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -203,8 +204,6 @@ sticky: return ret; } - - #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) @@ -257,11 +256,12 @@ get_max: #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) -int do_select(int n, fd_set_bits *fds, s64 *timeout) +int do_select(int n, fd_set_bits *fds, struct timespec *end_time) { + ktime_t expire, *to = NULL; struct poll_wqueues table; poll_table *wait; - int retval, i; + int retval, i, timed_out = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -273,12 +273,14 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) poll_initwait(&table); wait = &table.pt; - if (!*timeout) + if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { wait = NULL; + timed_out = 1; + } + retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - long __timeout; set_current_state(TASK_INTERRUPTIBLE); @@ -334,27 +336,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) cond_resched(); } wait = NULL; - if (retval || !*timeout || signal_pending(current)) + if (retval || timed_out || signal_pending(current)) break; if (table.error) { retval = table.error; break; } - if (*timeout < 0) { - /* Wait indefinitely */ - __timeout = MAX_SCHEDULE_TIMEOUT; - } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { - /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ - __timeout = MAX_SCHEDULE_TIMEOUT - 1; - *timeout -= __timeout; - } else { - __timeout = *timeout; - *timeout = 0; + /* + * If this is the first loop and we have a timeout + * given, then we convert to ktime_t and set the to + * pointer to the expiry value. + */ + if (end_time && !to) { + expire = timespec_to_ktime(*end_time); + to = &expire; } - __timeout = schedule_timeout(__timeout); - if (*timeout >= 0) - *timeout += __timeout; + + if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS)) + timed_out = 1; } __set_current_state(TASK_RUNNING); @@ -375,7 +375,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, s64 *timeout) + fd_set __user *exp, struct timespec *end_time) { fd_set_bits fds; void *bits; @@ -426,7 +426,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); - ret = do_select(n, &fds, timeout); + ret = do_select(n, &fds, end_time); if (ret < 0) goto out; @@ -452,7 +452,7 @@ out_nofds: asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) { - s64 timeout = -1; + struct timespec end_time, *to = NULL; struct timeval tv; int ret; @@ -460,43 +460,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; - if (tv.tv_sec < 0 || tv.tv_usec < 0) + to = &end_time; + if (poll_select_set_timeout(to, tv.tv_sec, + tv.tv_usec * NSEC_PER_USEC)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); - timeout += tv.tv_sec * HZ; - } } - ret = core_sys_select(n, inp, outp, exp, &timeout); - - if (tvp) { - struct timeval rtv; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - rtv.tv_sec = timeout; - if (timeval_compare(&rtv, &tv) >= 0) - rtv = tv; - if (copy_to_user(tvp, &rtv, sizeof(rtv))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); return ret; } @@ -506,25 +477,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) { - s64 timeout = MAX_SCHEDULE_TIMEOUT; sigset_t ksigmask, sigsaved; - struct timespec ts; + struct timespec ts, end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - if (ts.tv_sec < 0 || ts.tv_nsec < 0) + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); - timeout += ts.tv_sec * HZ; - } } if (sigmask) { @@ -538,32 +501,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = core_sys_select(n, inp, outp, exp, &timeout); - - if (tsp) { - struct timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = core_sys_select(n, inp, outp, exp, &end_time); + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { /* @@ -649,18 +588,20 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) } static int do_poll(unsigned int nfds, struct poll_list *list, - struct poll_wqueues *wait, s64 *timeout) + struct poll_wqueues *wait, struct timespec *end_time) { - int count = 0; poll_table* pt = &wait->pt; + ktime_t expire, *to = NULL; + int timed_out = 0, count = 0; /* Optimise the no-wait case */ - if (!(*timeout)) + if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { pt = NULL; + timed_out = 1; + } for (;;) { struct poll_list *walk; - long __timeout; set_current_state(TASK_INTERRUPTIBLE); for (walk = list; walk != NULL; walk = walk->next) { @@ -692,27 +633,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (signal_pending(current)) count = -EINTR; } - if (count || !*timeout) + if (count || timed_out) break; - if (*timeout < 0) { - /* Wait indefinitely */ - __timeout = MAX_SCHEDULE_TIMEOUT; - } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { - /* - * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in - * a loop - */ - __timeout = MAX_SCHEDULE_TIMEOUT - 1; - *timeout -= __timeout; - } else { - __timeout = *timeout; - *timeout = 0; + /* + * If this is the first loop and we have a timeout + * given, then we convert to ktime_t and set the to + * pointer to the expiry value. + */ + if (end_time && !to) { + expire = timespec_to_ktime(*end_time); + to = &expire; } - __timeout = schedule_timeout(__timeout); - if (*timeout >= 0) - *timeout += __timeout; + if (!schedule_hrtimeout(to, HRTIMER_MODE_ABS)) + timed_out = 1; } __set_current_state(TASK_RUNNING); return count; @@ -721,7 +656,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ sizeof(struct pollfd)) -int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) +int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, + struct timespec *end_time) { struct poll_wqueues table; int err = -EFAULT, fdcount, len, size; @@ -761,7 +697,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) } poll_initwait(&table); - fdcount = do_poll(nfds, head, &table, timeout); + fdcount = do_poll(nfds, head, &table, end_time); poll_freewait(&table); for (walk = head; walk; walk = walk->next) { @@ -787,16 +723,21 @@ out_fds: static long do_restart_poll(struct restart_block *restart_block) { - struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; - int nfds = restart_block->arg1; - s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; + struct pollfd __user *ufds = restart_block->poll.ufds; + int nfds = restart_block->poll.nfds; + struct timespec *to = NULL, end_time; int ret; - ret = do_sys_poll(ufds, nfds, &timeout); + if (restart_block->poll.has_timeout) { + end_time.tv_sec = restart_block->poll.tv_sec; + end_time.tv_nsec = restart_block->poll.tv_nsec; + to = &end_time; + } + + ret = do_sys_poll(ufds, nfds, to); + if (ret == -EINTR) { restart_block->fn = do_restart_poll; - restart_block->arg2 = timeout & 0xFFFFFFFF; - restart_block->arg3 = (u64)timeout >> 32; ret = -ERESTART_RESTARTBLOCK; } return ret; @@ -805,31 +746,32 @@ static long do_restart_poll(struct restart_block *restart_block) asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, long timeout_msecs) { - s64 timeout_jiffies; + struct timespec end_time, *to = NULL; int ret; - if (timeout_msecs > 0) { -#if HZ > 1000 - /* We can only overflow if HZ > 1000 */ - if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) - timeout_jiffies = -1; - else -#endif - timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; - } else { - /* Infinite (< 0) or no (0) timeout */ - timeout_jiffies = timeout_msecs; + if (timeout_msecs >= 0) { + to = &end_time; + poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, + NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); } - ret = do_sys_poll(ufds, nfds, &timeout_jiffies); + ret = do_sys_poll(ufds, nfds, to); + if (ret == -EINTR) { struct restart_block *restart_block; + restart_block = ¤t_thread_info()->restart_block; restart_block->fn = do_restart_poll; - restart_block->arg0 = (unsigned long)ufds; - restart_block->arg1 = nfds; - restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; - restart_block->arg3 = (u64)timeout_jiffies >> 32; + restart_block->poll.ufds = ufds; + restart_block->poll.nfds = nfds; + + if (timeout_msecs >= 0) { + restart_block->poll.tv_sec = end_time.tv_sec; + restart_block->poll.tv_nsec = end_time.tv_nsec; + restart_block->poll.has_timeout = 1; + } else + restart_block->poll.has_timeout = 0; + ret = -ERESTART_RESTARTBLOCK; } return ret; @@ -841,21 +783,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, size_t sigsetsize) { sigset_t ksigmask, sigsaved; - struct timespec ts; - s64 timeout = -1; + struct timespec ts, end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - /* Cast to u64 to make GCC stop complaining */ - if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); - timeout += ts.tv_sec * HZ; - } + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; } if (sigmask) { @@ -869,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = do_sys_poll(ufds, nfds, &timeout); + ret = do_sys_poll(ufds, nfds, to); /* We can restart this syscall, usually */ if (ret == -EINTR) { @@ -887,31 +824,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); - if (tsp && timeout >= 0) { - struct timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - /* Yes, we know it's actually an s64, but it's also positive. */ - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { - sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND && timeout >= 0) - ret = -EINTR; - } - } + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); return ret; } diff --git a/include/linux/poll.h b/include/linux/poll.h index f65de5128a9e..badd98ab06f6 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -114,11 +114,11 @@ void zero_fd_set(unsigned long nr, unsigned long *fdset) #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) -extern int do_select(int n, fd_set_bits *fds, s64 *timeout); +extern int do_select(int n, fd_set_bits *fds, struct timespec *end_time); extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, - s64 *timeout); + struct timespec *end_time); extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, s64 *timeout); + fd_set __user *exp, struct timespec *end_time); extern int poll_select_set_timeout(struct timespec *to, long sec, long nsec); -- cgit v1.2.3 From 63ca243b271f5b44e0b1057003cf498b6d0fadf7 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 14:35:02 -0700 Subject: hrtimer: add abstraction functions for accessing the "expires" member In order to be able to turn hrtimers into range based, we need to provide accessor functions for getting to the "expires" ktime_t member of the struct hrtimer. This patch adds a set of accessors for this purpose: * hrtimer_set_expires * hrtimer_set_expires_tv64 * hrtimer_add_expires * hrtimer_add_expires_ns * hrtimer_get_expires * hrtimer_get_expires_tv64 * hrtimer_get_expires_ns * hrtimer_expires_remaining * hrtimer_start_expires No users of these new accessors are added yet; these follow in later patches. Hopefully this patch can even go into 2.6.27-rc so that the conversions will not have a bottleneck in -next Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index becd17db1a1a..9900e998ea8f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -217,6 +217,45 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) return timer->base->cpu_base->hres_active; } +static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) +{ + timer->expires = time; +} +static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) +{ + timer->expires.tv64 = tv64; +} + +static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) +{ + timer->expires = ktime_add_safe(timer->expires, time); +} + +static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) +{ + timer->expires = ktime_add_ns(timer->expires, ns); +} + +static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) +{ + return timer->expires; +} + +static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) +{ + return timer->expires.tv64; +} + +static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) +{ + return ktime_to_ns(timer->expires); +} + +static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) +{ + return ktime_sub(timer->expires, timer->base->get_time()); +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -287,6 +326,12 @@ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); +static inline int hrtimer_start_expires(struct hrtimer *timer, + enum hrtimer_mode mode) +{ + return hrtimer_start(timer, hrtimer_get_expires(timer), mode); +} + static inline int hrtimer_restart(struct hrtimer *timer) { return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); -- cgit v1.2.3 From 799b64de256ea68fbb5db63bb55f61c305870643 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:27:58 -0700 Subject: hrtimer: rename the "expires" struct member to avoid accidental usage To catch code that still touches the "expires" memory directly, rename it to have the compiler complain rather than get nasty, hard to explain, runtime behavior Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 9900e998ea8f..485a634fd6e2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -111,7 +111,7 @@ enum hrtimer_cb_mode { */ struct hrtimer { struct rb_node node; - ktime_t expires; + ktime_t _expires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; @@ -219,41 +219,41 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { - timer->expires = time; + timer->_expires = time; } static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { - timer->expires.tv64 = tv64; + timer->_expires.tv64 = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { - timer->expires = ktime_add_safe(timer->expires, time); + timer->_expires = ktime_add_safe(timer->_expires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) { - timer->expires = ktime_add_ns(timer->expires, ns); + timer->_expires = ktime_add_ns(timer->_expires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) { - return timer->expires; + return timer->_expires; } static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { - return timer->expires.tv64; + return timer->_expires.tv64; } static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { - return ktime_to_ns(timer->expires); + return ktime_to_ns(timer->_expires); } static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) { - return ktime_sub(timer->expires, timer->base->get_time()); + return ktime_sub(timer->_expires, timer->base->get_time()); } /* @@ -334,7 +334,7 @@ static inline int hrtimer_start_expires(struct hrtimer *timer, static inline int hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS); + return hrtimer_start(timer, timer->_expires, HRTIMER_MODE_ABS); } /* Query timers: */ -- cgit v1.2.3 From 654c8e0b1c623b156c5b92f28d914ab38c9c2c90 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:47:08 -0700 Subject: hrtimer: turn hrtimers into range timers this patch turns hrtimers into range timers; they have 2 expire points 1) the soft expire point 2) the hard expire point the kernel will do it's regular best effort attempt to get the timer run at the hard expire point. However, if some other time fires after the soft expire point, the kernel now has the freedom to fire this timer at this point, and thus grouping the events and preventing a power-expensive wakeup in the future. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 31 ++++++++++++++++++++++++++- kernel/hrtimer.c | 56 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 485a634fd6e2..28259c336679 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -112,6 +112,7 @@ enum hrtimer_cb_mode { struct hrtimer { struct rb_node node; ktime_t _expires; + ktime_t _softexpires; enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; @@ -220,20 +221,37 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->_expires = time; + timer->_softexpires = time; } + +static inline void hrtimer_set_expires_range(struct hrtimer *timer, ktime_t time, ktime_t delta) +{ + timer->_softexpires = time; + timer->_expires = ktime_add_safe(time, delta); +} + +static inline void hrtimer_set_expires_range_ns(struct hrtimer *timer, ktime_t time, unsigned long delta) +{ + timer->_softexpires = time; + timer->_expires = ktime_add_safe(time, ns_to_ktime(delta)); +} + static inline void hrtimer_set_expires_tv64(struct hrtimer *timer, s64 tv64) { timer->_expires.tv64 = tv64; + timer->_softexpires.tv64 = tv64; } static inline void hrtimer_add_expires(struct hrtimer *timer, ktime_t time) { timer->_expires = ktime_add_safe(timer->_expires, time); + timer->_softexpires = ktime_add_safe(timer->_softexpires, time); } static inline void hrtimer_add_expires_ns(struct hrtimer *timer, unsigned long ns) { timer->_expires = ktime_add_ns(timer->_expires, ns); + timer->_softexpires = ktime_add_ns(timer->_softexpires, ns); } static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) @@ -241,10 +259,19 @@ static inline ktime_t hrtimer_get_expires(const struct hrtimer *timer) return timer->_expires; } +static inline ktime_t hrtimer_get_softexpires(const struct hrtimer *timer) +{ + return timer->_softexpires; +} + static inline s64 hrtimer_get_expires_tv64(const struct hrtimer *timer) { return timer->_expires.tv64; } +static inline s64 hrtimer_get_softexpires_tv64(const struct hrtimer *timer) +{ + return timer->_softexpires.tv64; +} static inline s64 hrtimer_get_expires_ns(const struct hrtimer *timer) { @@ -334,7 +361,7 @@ static inline int hrtimer_start_expires(struct hrtimer *timer, static inline int hrtimer_restart(struct hrtimer *timer) { - return hrtimer_start(timer, timer->_expires, HRTIMER_MODE_ABS); + return hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } /* Query timers: */ @@ -391,6 +418,8 @@ extern long hrtimer_nanosleep_restart(struct restart_block *restart_block); extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *tsk); +extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, + const enum hrtimer_mode mode); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); /* Soft interrupt function to run the hrtimer queues: */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ae307feec74c..01483004183d 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1309,7 +1309,20 @@ void hrtimer_interrupt(struct clock_event_device *dev) timer = rb_entry(node, struct hrtimer, node); - if (basenow.tv64 < hrtimer_get_expires_tv64(timer)) { + /* + * The immediate goal for using the softexpires is + * minimizing wakeups, not running timers at the + * earliest interrupt after their soft expiration. + * This allows us to avoid using a Priority Search + * Tree, which can answer a stabbing querry for + * overlapping intervals and instead use the simple + * BST we already have. + * We don't add extra wakeups by delaying timers that + * are right-of a not yet expired timer, because that + * timer will have to trigger a wakeup anyway. + */ + + if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) { ktime_t expires; expires = ktime_sub(hrtimer_get_expires(timer), @@ -1681,14 +1694,20 @@ void __init hrtimers_init(void) } /** - * schedule_hrtimeout - sleep until timeout + * schedule_hrtimeout_range - sleep until timeout * @expires: timeout value (ktime_t) + * @delta: slack in expires timeout (ktime_t) * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL * * Make the current task sleep until the given expiry time has * elapsed. The routine will return immediately unless * the current task state has been set (see set_current_state()). * + * The @delta argument gives the kernel the freedom to schedule the + * actual wakeup to a time that is both power and performance friendly. + * The kernel give the normal best effort behavior for "@expires+@delta", + * but may decide to fire the timer earlier, but no earlier than @expires. + * * You can set the task state as follows - * * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to @@ -1702,7 +1721,7 @@ void __init hrtimers_init(void) * * Returns 0 when the timer has expired otherwise -EINTR */ -int __sched schedule_hrtimeout(ktime_t *expires, +int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode) { struct hrtimer_sleeper t; @@ -1726,7 +1745,7 @@ int __sched schedule_hrtimeout(ktime_t *expires, } hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); - hrtimer_set_expires(&t.timer, *expires); + hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_init_sleeper(&t, current); @@ -1744,4 +1763,33 @@ int __sched schedule_hrtimeout(ktime_t *expires, return !t.task ? 0 : -EINTR; } +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); + +/** + * schedule_hrtimeout - sleep until timeout + * @expires: timeout value (ktime_t) + * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL + * + * Make the current task sleep until the given expiry time has + * elapsed. The routine will return immediately unless + * the current task state has been set (see set_current_state()). + * + * You can set the task state as follows - + * + * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to + * pass before the routine returns. + * + * %TASK_INTERRUPTIBLE - the routine may return early if a signal is + * delivered to the current task. + * + * The current task state is guaranteed to be TASK_RUNNING when this + * routine returns. + * + * Returns 0 when the timer has expired otherwise -EINTR + */ +int __sched schedule_hrtimeout(ktime_t *expires, + const enum hrtimer_mode mode) +{ + return schedule_hrtimeout_range(expires, 0, mode); +} EXPORT_SYMBOL_GPL(schedule_hrtimeout); -- cgit v1.2.3 From 6976675d94042fbd446231d1bd8b7de71a980ada Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 1 Sep 2008 15:52:40 -0700 Subject: hrtimer: create a "timer_slack" field in the task struct We want to be able to control the default "rounding" that is used by select() and poll() and friends. This is a per process property (so that we can have a "nice" like program to start certain programs with a looser or stricter rounding) that can be set/get via a prctl(). For this purpose, a field called "timer_slack_ns" is added to the task struct. In addition, a field called "default_timer_slack"ns" is added so that tasks easily can temporarily to a more/less accurate slack and then back to the default. The default value of the slack is set to 50 usec; this is significantly less than 2.6.27's average select() and poll() timing error but still allows the kernel to group timers somewhat to preserve power behavior. Applications and admins can override this via the prctl() Signed-off-by: Arjan van de Ven --- include/linux/init_task.h | 1 + include/linux/prctl.h | 7 +++++++ include/linux/sched.h | 6 ++++++ kernel/fork.c | 2 ++ kernel/sys.c | 10 ++++++++++ 5 files changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 021d8e720c79..23fd8909b9e5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -170,6 +170,7 @@ extern struct group_info init_groups; .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .fs_excl = ATOMIC_INIT(0), \ .pi_lock = __SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ + .timer_slack_ns = 50000, /* 50 usec default slack */ \ .pids = { \ [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 5ad79198d6f9..48d887e3c6e7 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -78,4 +78,11 @@ #define PR_GET_SECUREBITS 27 #define PR_SET_SECUREBITS 28 +/* + * Get/set the timerslack as used by poll/select/nanosleep + * A value of 0 means "use default" + */ +#define PR_SET_TIMERSLACK 29 +#define PR_GET_TIMERSLACK 30 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad15..dcc03fd5a7f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1301,6 +1301,12 @@ struct task_struct { int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif + /* + * time slack values; these are used to round up poll() and + * select() etc timeout values. These are in nanoseconds. + */ + unsigned long timer_slack_ns; + unsigned long default_timer_slack_ns; }; /* diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe84796..4308d75f0fa5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -987,6 +987,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; + p->default_timer_slack_ns = current->timer_slack_ns; + #ifdef CONFIG_DETECT_SOFTLOCKUP p->last_switch_count = 0; p->last_switch_timestamp = 0; diff --git a/kernel/sys.c b/kernel/sys.c index 038a7bc0901d..1b96401a0576 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1727,6 +1727,16 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; + case PR_GET_TIMERSLACK: + error = current->timer_slack_ns; + break; + case PR_SET_TIMERSLACK: + if (arg2 <= 0) + current->timer_slack_ns = + current->default_timer_slack_ns; + else + current->timer_slack_ns = arg2; + break; default: error = -EINVAL; break; -- cgit v1.2.3 From 584fb4a76413ec9215741e075e0dfb69173b213f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 6 Sep 2008 08:32:57 -0700 Subject: hrtimer: fix build bug found by Ingo in some randconfig configurations, hrtimers are used even though the hrtimer config if off; and it broke the build due to some of the new functions being on the wrong side of the ifdef. This patch moves the functions to the other side of the ifdef, fixing the build bug. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 28259c336679..c407b33ef844 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -198,13 +198,6 @@ struct hrtimer_cpu_base { #endif }; -#ifdef CONFIG_HIGH_RES_TIMERS -struct clock_event_device; - -extern void clock_was_set(void); -extern void hres_timers_resume(void); -extern void hrtimer_interrupt(struct clock_event_device *dev); - /* * In high resolution mode the time reference must be read accurate */ @@ -283,6 +276,13 @@ static inline ktime_t hrtimer_expires_remaining(const struct hrtimer *timer) return ktime_sub(timer->_expires, timer->base->get_time()); } +#ifdef CONFIG_HIGH_RES_TIMERS +struct clock_event_device; + +extern void clock_was_set(void); +extern void hres_timers_resume(void); +extern void hrtimer_interrupt(struct clock_event_device *dev); + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3 From 2ec02270c00f94b08fddfb68c37510a9fb47ac7c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 6 Sep 2008 09:36:56 -0700 Subject: hrtimer: another build fix More randconfig testing Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c407b33ef844..4c1a834b9849 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -198,19 +198,6 @@ struct hrtimer_cpu_base { #endif }; -/* - * In high resolution mode the time reference must be read accurate - */ -static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) -{ - return timer->base->get_time(); -} - -static inline int hrtimer_is_hres_active(struct hrtimer *timer) -{ - return timer->base->cpu_base->hres_active; -} - static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) { timer->_expires = time; @@ -283,6 +270,19 @@ extern void clock_was_set(void); extern void hres_timers_resume(void); extern void hrtimer_interrupt(struct clock_event_device *dev); +/* + * In high resolution mode the time reference must be read accurate + */ +static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer) +{ + return timer->base->get_time(); +} + +static inline int hrtimer_is_hres_active(struct hrtimer *timer) +{ + return timer->base->cpu_base->hres_active; +} + /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an -- cgit v1.2.3 From da8f2e170ea94cc20f8ebbc8ee8d127edb8f12f1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 7 Sep 2008 10:47:46 -0700 Subject: hrtimer: add a hrtimer_start_range() function this patch adds a _range version of hrtimer_start() so that range timers can be created; the hrtimer_start() function is just a wrapper around this. In addition, hrtimer_start_expires() will now preserve existing ranges. Signed-off-by: Arjan van de Ven --- include/linux/hrtimer.h | 9 ++++++++- kernel/hrtimer.c | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 4c1a834b9849..1c0473e8ecb4 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -350,13 +350,20 @@ static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } /* Basic timer operations: */ extern int hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode); +extern int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, + unsigned long range_ns, const enum hrtimer_mode mode); extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); static inline int hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) { - return hrtimer_start(timer, hrtimer_get_expires(timer), mode); + unsigned long delta; + ktime_t soft, hard; + soft = hrtimer_get_softexpires(timer); + hard = hrtimer_get_expires(timer); + delta = ktime_to_ns(ktime_sub(hard, soft)); + return hrtimer_start_range_ns(timer, hrtimer_get_expires(timer), delta, mode); } static inline int hrtimer_restart(struct hrtimer *timer) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 01483004183d..a0222097c57e 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -945,9 +945,10 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) } /** - * hrtimer_start - (re)start an relative timer on the current CPU + * hrtimer_start_range_ns - (re)start an relative timer on the current CPU * @timer: the timer to be added * @tim: expiry time + * @delta_ns: "slack" range for the timer * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) * * Returns: @@ -955,7 +956,8 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) * 1 when the timer was active */ int -hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns, + const enum hrtimer_mode mode) { struct hrtimer_clock_base *base, *new_base; unsigned long flags; @@ -983,7 +985,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) #endif } - hrtimer_set_expires(timer, tim); + hrtimer_set_expires_range_ns(timer, tim, delta_ns); timer_stats_hrtimer_set_start_info(timer); @@ -1016,8 +1018,26 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) return ret; } +EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); + +/** + * hrtimer_start - (re)start an relative timer on the current CPU + * @timer: the timer to be added + * @tim: expiry time + * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL) + * + * Returns: + * 0 on success + * 1 when the timer was active + */ +int +hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) +{ + return hrtimer_start_range_ns(timer, tim, 0, mode); +} EXPORT_SYMBOL_GPL(hrtimer_start); + /** * hrtimer_try_to_cancel - try to deactivate a timer * @timer: hrtimer to stop -- cgit v1.2.3 From 4ce105d30e08fb8a1783c55a0e48aa3fa200c455 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 7 Sep 2008 15:31:39 -0700 Subject: hrtimer: incorporate feedback from Peter Zijlstra (based on lkml review) * use rt_task() * task_nice() has a sign Signed-off-by: Arjan van de Ven --- fs/select.c | 5 ++--- include/linux/hrtimer.h | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/select.c b/fs/select.c index 5e61b43d0766..fdd8584e536d 100644 --- a/fs/select.c +++ b/fs/select.c @@ -46,7 +46,7 @@ static unsigned long __estimate_accuracy(struct timespec *tv) unsigned long slack; int divfactor = 1000; - if (task_nice(current)) + if (task_nice(current) > 0) divfactor = divfactor / 5; slack = tv->tv_nsec / divfactor; @@ -66,8 +66,7 @@ static unsigned long estimate_accuracy(struct timespec *tv) * Realtime tasks get a slack of 0 for obvious reasons. */ - if (current->policy == SCHED_FIFO || - current->policy == SCHED_RR) + if (rt_task(current)) return 0; ktime_get_ts(&now); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1c0473e8ecb4..95db11f62ff2 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -363,7 +363,7 @@ static inline int hrtimer_start_expires(struct hrtimer *timer, soft = hrtimer_get_softexpires(timer); hard = hrtimer_get_expires(timer); delta = ktime_to_ns(ktime_sub(hard, soft)); - return hrtimer_start_range_ns(timer, hrtimer_get_expires(timer), delta, mode); + return hrtimer_start_range_ns(timer, soft, delta, mode); } static inline int hrtimer_restart(struct hrtimer *timer) -- cgit v1.2.3 From 2e94d1f71f7e4404d997e6fb4f1618aa147d76f9 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 10 Sep 2008 16:06:00 -0700 Subject: hrtimer: peek at the timer queue just before going idle As part of going idle, we already look at the time of the next timer event to determine which C-state to select etc. This patch adds functionality that causes the timers that are past their soft expire time, to fire at this time, before we calculate the next wakeup time. This functionality will thus avoid wakeups by running timers before going idle rather than specially waking up for it. Signed-off-by: Arjan van de Ven --- drivers/cpuidle/cpuidle.c | 7 +++++++ include/linux/hrtimer.h | 5 +++++ kernel/hrtimer.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'include') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 5ce07b517c58..2e3148499368 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "cpuidle.h" @@ -60,6 +61,12 @@ static void cpuidle_idle_call(void) return; } + /* + * run any timers that can be run now, at this point + * before calculating the idle duration etc. + */ + hrtimer_peek_ahead_timers(); + /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(dev); if (need_resched()) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 95db11f62ff2..d93b1e1dc169 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -326,6 +326,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); + +DECLARE_PER_CPU(struct tick_device, tick_cpu_device); +extern void hrtimer_peek_ahead_timers(void); + + /* Exported timer functions: */ /* Initialize timers: */ diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9a4c90185566..eb2cf984959f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1381,6 +1381,36 @@ void hrtimer_interrupt(struct clock_event_device *dev) raise_softirq(HRTIMER_SOFTIRQ); } +/** + * hrtimer_peek_ahead_timers -- run soft-expired timers now + * + * hrtimer_peek_ahead_timers will peek at the timer queue of + * the current cpu and check if there are any timers for which + * the soft expires time has passed. If any such timers exist, + * they are run immediately and then removed from the timer queue. + * + */ +void hrtimer_peek_ahead_timers(void) +{ + unsigned long flags; + struct tick_device *td; + struct clock_event_device *dev; + + if (hrtimer_hres_active()) + return; + + local_irq_save(flags); + td = &__get_cpu_var(tick_cpu_device); + if (!td) + goto out; + dev = td->evtdev; + if (!dev) + goto out; + hrtimer_interrupt(dev); +out: + local_irq_restore(flags); +} + static void run_hrtimer_softirq(struct softirq_action *h) { run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); -- cgit v1.2.3 From f06febc96ba8e0af80bcc3eaec0a109e88275fac Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang Overview This patch reworks the handling of POSIX CPU timers, including the ITIMER_PROF, ITIMER_VIRT timers and rlimit handling. It was put together with the help of Roland McGrath, the owner and original writer of this code. The problem we ran into, and the reason for this rework, has to do with using a profiling timer in a process with a large number of threads. It appears that the performance of the old implementation of run_posix_cpu_timers() was at least O(n*3) (where "n" is the number of threads in a process) or worse. Everything is fine with an increasing number of threads until the time taken for that routine to run becomes the same as or greater than the tick time, at which point things degrade rather quickly. This patch fixes bug 9906, "Weird hang with NPTL and SIGPROF." Code Changes This rework corrects the implementation of run_posix_cpu_timers() to make it run in constant time for a particular machine. (Performance may vary between one machine and another depending upon whether the kernel is built as single- or multiprocessor and, in the latter case, depending upon the number of running processors.) To do this, at each tick we now update fields in signal_struct as well as task_struct. The run_posix_cpu_timers() function uses those fields to make its decisions. We define a new structure, "task_cputime," to contain user, system and scheduler times and use these in appropriate places: struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; This is included in the structure "thread_group_cputime," which is a new substructure of signal_struct and which varies for uniprocessor versus multiprocessor kernels. For uniprocessor kernels, it uses "task_cputime" as a simple substructure, while for multiprocessor kernels it is a pointer: struct thread_group_cputime { struct task_cputime totals; }; struct thread_group_cputime { struct task_cputime *totals; }; We also add a new task_cputime substructure directly to signal_struct, to cache the earliest expiration of process-wide timers, and task_cputime also replaces the it_*_expires fields of task_struct (used for earliest expiration of thread timers). The "thread_group_cputime" structure contains process-wide timers that are updated via account_user_time() and friends. In the non-SMP case the structure is a simple aggregator; unfortunately in the SMP case that simplicity was not achievable due to cache-line contention between CPUs (in one measured case performance was actually _worse_ on a 16-cpu system than the same test on a 4-cpu system, due to this contention). For SMP, the thread_group_cputime counters are maintained as a per-cpu structure allocated using alloc_percpu(). The timer functions update only the timer field in the structure corresponding to the running CPU, obtained using per_cpu_ptr(). We define a set of inline functions in sched.h that we use to maintain the thread_group_cputime structure and hide the differences between UP and SMP implementations from the rest of the kernel. The thread_group_cputime_init() function initializes the thread_group_cputime structure for the given task. The thread_group_cputime_alloc() is a no-op for UP; for SMP it calls the out-of-line function thread_group_cputime_alloc_smp() to allocate and fill in the per-cpu structures and fields. The thread_group_cputime_free() function, also a no-op for UP, in SMP frees the per-cpu structures. The thread_group_cputime_clone_thread() function (also a UP no-op) for SMP calls thread_group_cputime_alloc() if the per-cpu structures haven't yet been allocated. The thread_group_cputime() function fills the task_cputime structure it is passed with the contents of the thread_group_cputime fields; in UP it's that simple but in SMP it must also safely check that tsk->signal is non-NULL (if it is it just uses the appropriate fields of task_struct) and, if so, sums the per-cpu values for each online CPU. Finally, the three functions account_group_user_time(), account_group_system_time() and account_group_exec_runtime() are used by timer functions to update the respective fields of the thread_group_cputime structure. Non-SMP operation is trivial and will not be mentioned further. The per-cpu structure is always allocated when a task creates its first new thread, via a call to thread_group_cputime_clone_thread() from copy_signal(). It is freed at process exit via a call to thread_group_cputime_free() from cleanup_signal(). All functions that formerly summed utime/stime/sum_sched_runtime values from from all threads in the thread group now use thread_group_cputime() to snapshot the values in the thread_group_cputime structure or the values in the task structure itself if the per-cpu structure hasn't been allocated. Finally, the code in kernel/posix-cpu-timers.c has changed quite a bit. The run_posix_cpu_timers() function has been split into a fast path and a slow path; the former safely checks whether there are any expired thread timers and, if not, just returns, while the slow path does the heavy lifting. With the dedicated thread group fields, timers are no longer "rebalanced" and the process_timer_rebalance() function and related code has gone away. All summing loops are gone and all code that used them now uses the thread_group_cputime() inline. When process-wide timers are set, the new task_cputime structure in signal_struct is used to cache the earliest expiration; this is checked in the fast path. Performance The fix appears not to add significant overhead to existing operations. It generally performs the same as the current code except in two cases, one in which it performs slightly worse (Case 5 below) and one in which it performs very significantly better (Case 2 below). Overall it's a wash except in those two cases. I've since done somewhat more involved testing on a dual-core Opteron system. Case 1: With no itimer running, for a test with 100,000 threads, the fixed kernel took 1428.5 seconds, 513 seconds more than the unfixed system, all of which was spent in the system. There were twice as many voluntary context switches with the fix as without it. Case 2: With an itimer running at .01 second ticks and 4000 threads (the most an unmodified kernel can handle), the fixed kernel ran the test in eight percent of the time (5.8 seconds as opposed to 70 seconds) and had better tick accuracy (.012 seconds per tick as opposed to .023 seconds per tick). Case 3: A 4000-thread test with an initial timer tick of .01 second and an interval of 10,000 seconds (i.e. a timer that ticks only once) had very nearly the same performance in both cases: 6.3 seconds elapsed for the fixed kernel versus 5.5 seconds for the unfixed kernel. With fewer threads (eight in these tests), the Case 1 test ran in essentially the same time on both the modified and unmodified kernels (5.2 seconds versus 5.8 seconds). The Case 2 test ran in about the same time as well, 5.9 seconds versus 5.4 seconds but again with much better tick accuracy, .013 seconds per tick versus .025 seconds per tick for the unmodified kernel. Since the fix affected the rlimit code, I also tested soft and hard CPU limits. Case 4: With a hard CPU limit of 20 seconds and eight threads (and an itimer running), the modified kernel was very slightly favored in that while it killed the process in 19.997 seconds of CPU time (5.002 seconds of wall time), only .003 seconds of that was system time, the rest was user time. The unmodified kernel killed the process in 20.001 seconds of CPU (5.014 seconds of wall time) of which .016 seconds was system time. Really, though, the results were too close to call. The results were essentially the same with no itimer running. Case 5: With a soft limit of 20 seconds and a hard limit of 2000 seconds (where the hard limit would never be reached) and an itimer running, the modified kernel exhibited worse tick accuracy than the unmodified kernel: .050 seconds/tick versus .028 seconds/tick. Otherwise, performance was almost indistinguishable. With no itimer running this test exhibited virtually identical behavior and times in both cases. In times past I did some limited performance testing. those results are below. On a four-cpu Opteron system without this fix, a sixteen-thread test executed in 3569.991 seconds, of which user was 3568.435s and system was 1.556s. On the same system with the fix, user and elapsed time were about the same, but system time dropped to 0.007 seconds. Performance with eight, four and one thread were comparable. Interestingly, the timer ticks with the fix seemed more accurate: The sixteen-thread test with the fix received 149543 ticks for 0.024 seconds per tick, while the same test without the fix received 58720 for 0.061 seconds per tick. Both cases were configured for an interval of 0.01 seconds. Again, the other tests were comparable. Each thread in this test computed the primes up to 25,000,000. I also did a test with a large number of threads, 100,000 threads, which is impossible without the fix. In this case each thread computed the primes only up to 10,000 (to make the runtime manageable). System time dominated, at 1546.968 seconds out of a total 2176.906 seconds (giving a user time of 629.938s). It received 147651 ticks for 0.015 seconds per tick, still quite accurate. There is obviously no comparable test without the fix. Signed-off-by: Frank Mayhar Cc: Roland McGrath Cc: Alexey Dobriyan Cc: Andrew Morton Signed-off-by: Ingo Molnar --- fs/binfmt_elf.c | 19 +- fs/proc/array.c | 8 +- include/linux/posix-timers.h | 2 + include/linux/sched.h | 257 +++++++++++++++++++++-- include/linux/time.h | 3 + kernel/compat.c | 53 ++--- kernel/exit.c | 19 +- kernel/fork.c | 88 ++++---- kernel/itimer.c | 33 +-- kernel/posix-cpu-timers.c | 471 +++++++++++++++++++++++-------------------- kernel/sched.c | 53 ++++- kernel/sched_fair.c | 1 + kernel/sched_rt.c | 4 +- kernel/signal.c | 8 +- kernel/sys.c | 75 +++---- security/selinux/hooks.c | 9 +- 16 files changed, 677 insertions(+), 426 deletions(-) (limited to 'include') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 655ed8d30a86..a8635f637038 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1333,20 +1333,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { + struct task_cputime cputime; + /* - * This is the record for the group leader. Add in the - * cumulative times of previous dead threads. This total - * won't include the time of each live thread whose state - * is included in the core dump. The final total reported - * to our parent process when it calls wait4 will include - * those sums as well as the little bit more time it takes - * this and each other thread to finish dying after the - * core dump synchronization phase. + * This is the record for the group leader. It shows the + * group-wide total, not its individual thread total. */ - cputime_to_timeval(cputime_add(p->utime, p->signal->utime), - &prstatus->pr_utime); - cputime_to_timeval(cputime_add(p->stime, p->signal->stime), - &prstatus->pr_stime); + thread_group_cputime(p, &cputime); + cputime_to_timeval(cputime.utime, &prstatus->pr_utime); + cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { cputime_to_timeval(p->utime, &prstatus->pr_utime); cputime_to_timeval(p->stime, &prstatus->pr_stime); diff --git a/fs/proc/array.c b/fs/proc/array.c index 71c9be59c9c2..933953c4e407 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -395,20 +395,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { + struct task_cputime cputime; struct task_struct *t = task; do { min_flt += t->min_flt; maj_flt += t->maj_flt; - utime = cputime_add(utime, task_utime(t)); - stime = cputime_add(stime, task_stime(t)); gtime = cputime_add(gtime, task_gtime(t)); t = next_thread(t); } while (t != task); min_flt += sig->min_flt; maj_flt += sig->maj_flt; - utime = cputime_add(utime, sig->utime); - stime = cputime_add(stime, sig->stime); + thread_group_cputime(task, &cputime); + utime = cputime.utime; + stime = cputime.stime; gtime = cputime_add(gtime, sig->gtime); } diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a7dd38f30ade..f9d8e9e94e9b 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -115,4 +115,6 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, long clock_nanosleep_restart(struct restart_block *restart_block); +void update_rlimit_cpu(unsigned long rlim_new); + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d9120c5ad15..26d7a5f2d0ba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -425,6 +425,45 @@ struct pacct_struct { unsigned long ac_minflt, ac_majflt; }; +/** + * struct task_cputime - collected CPU time counts + * @utime: time spent in user mode, in &cputime_t units + * @stime: time spent in kernel mode, in &cputime_t units + * @sum_exec_runtime: total time spent on the CPU, in nanoseconds + * + * This structure groups together three kinds of CPU time that are + * tracked for threads and thread groups. Most things considering + * CPU time want to group these counts together and treat all three + * of them in parallel. + */ +struct task_cputime { + cputime_t utime; + cputime_t stime; + unsigned long long sum_exec_runtime; +}; +/* Alternate field names when used to cache expirations. */ +#define prof_exp stime +#define virt_exp utime +#define sched_exp sum_exec_runtime + +/** + * struct thread_group_cputime - thread group interval timer counts + * @totals: thread group interval timers; substructure for + * uniprocessor kernel, per-cpu for SMP kernel. + * + * This structure contains the version of task_cputime, above, that is + * used for thread group CPU clock calculations. + */ +#ifdef CONFIG_SMP +struct thread_group_cputime { + struct task_cputime *totals; +}; +#else +struct thread_group_cputime { + struct task_cputime totals; +}; +#endif + /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always @@ -470,6 +509,17 @@ struct signal_struct { cputime_t it_prof_expires, it_virt_expires; cputime_t it_prof_incr, it_virt_incr; + /* + * Thread group totals for process CPU clocks. + * See thread_group_cputime(), et al, for details. + */ + struct thread_group_cputime cputime; + + /* Earliest-expiration cache. */ + struct task_cputime cputime_expires; + + struct list_head cpu_timers[3]; + /* job control IDs */ /* @@ -500,7 +550,7 @@ struct signal_struct { * Live threads maintain their own counters and add to these * in __exit_signal, except for the group leader. */ - cputime_t utime, stime, cutime, cstime; + cputime_t cutime, cstime; cputime_t gtime; cputime_t cgtime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; @@ -508,14 +558,6 @@ struct signal_struct { unsigned long inblock, oublock, cinblock, coublock; struct task_io_accounting ioac; - /* - * Cumulative ns of scheduled CPU time for dead threads in the - * group, not including a zombie group leader. (This only differs - * from jiffies_to_ns(utime + stime) if sched_clock uses something - * other than jiffies.) - */ - unsigned long long sum_sched_runtime; - /* * We don't bother to synchronize most readers of this at all, * because there is no reader checking a limit that actually needs @@ -527,8 +569,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - struct list_head cpu_timers[3]; - /* keep the process-shared keyrings here so that they do the right * thing in threads created with CLONE_THREAD */ #ifdef CONFIG_KEYS @@ -1134,8 +1174,7 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; - cputime_t it_prof_expires, it_virt_expires; - unsigned long long it_sched_expires; + struct task_cputime cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ @@ -1585,6 +1624,7 @@ extern unsigned long long cpu_clock(int cpu); extern unsigned long long task_sched_runtime(struct task_struct *task); +extern unsigned long long thread_group_sched_runtime(struct task_struct *task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP @@ -2081,6 +2121,197 @@ static inline int spin_needbreak(spinlock_t *lock) #endif } +/* + * Thread group CPU time accounting. + */ +#ifdef CONFIG_SMP + +extern int thread_group_cputime_alloc_smp(struct task_struct *); +extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); + +static inline void thread_group_cputime_init(struct signal_struct *sig) +{ + sig->cputime.totals = NULL; +} + +static inline int thread_group_cputime_clone_thread(struct task_struct *curr, + struct task_struct *new) +{ + if (curr->signal->cputime.totals) + return 0; + return thread_group_cputime_alloc_smp(curr); +} + +static inline void thread_group_cputime_free(struct signal_struct *sig) +{ + free_percpu(sig->cputime.totals); +} + +/** + * thread_group_cputime - Sum the thread group time fields across all CPUs. + * + * This is a wrapper for the real routine, thread_group_cputime_smp(). See + * that routine for details. + */ +static inline void thread_group_cputime( + struct task_struct *tsk, + struct task_cputime *times) +{ + thread_group_cputime_smp(tsk, times); +} + +/** + * thread_group_cputime_account_user - Maintain utime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the utime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->utime = cputime_add(times->utime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_system - Maintain stime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the stime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->stime = cputime_add(times->stime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a + * thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of that structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->sum_exec_runtime += ns; + put_cpu_no_resched(); + } +} + +#else /* CONFIG_SMP */ + +static inline void thread_group_cputime_init(struct signal_struct *sig) +{ + sig->cputime.totals.utime = cputime_zero; + sig->cputime.totals.stime = cputime_zero; + sig->cputime.totals.sum_exec_runtime = 0; +} + +static inline int thread_group_cputime_alloc(struct task_struct *tsk) +{ + return 0; +} + +static inline void thread_group_cputime_free(struct signal_struct *sig) +{ +} + +static inline int thread_group_cputime_clone_thread(struct task_struct *curr, + struct task_struct *tsk) +{ +} + +static inline void thread_group_cputime(struct task_struct *tsk, + struct task_cputime *cputime) +{ + *cputime = tsk->signal->cputime.totals; +} + +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); +} + +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); +} + +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + tgtimes->totals->sum_exec_runtime += ns; +} + +#endif /* CONFIG_SMP */ + +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_user(&sig->cputime, cputime); +} + +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_system(&sig->cputime, cputime); +} + +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_exec_runtime(&sig->cputime, ns); +} + /* * Reevaluate whether the task has signals pending delivery. * Wake the task if so. diff --git a/include/linux/time.h b/include/linux/time.h index e15206a7e82e..1b70b3c293e9 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -125,6 +125,9 @@ extern int timekeeping_valid_for_hres(void); extern void update_wall_time(void); extern void update_xtime_cache(u64 nsec); +struct tms; +extern void do_sys_times(struct tms *); + /** * timespec_to_ns - Convert timespec to nanoseconds * @ts: pointer to the timespec variable to be converted diff --git a/kernel/compat.c b/kernel/compat.c index 32c254a8ab9a..72650e39b3e6 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -150,49 +151,23 @@ asmlinkage long compat_sys_setitimer(int which, return 0; } +static compat_clock_t clock_t_to_compat_clock_t(clock_t x) +{ + return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); +} + asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) { - /* - * In the SMP world we might just be unlucky and have one of - * the times increment as we use it. Since the value is an - * atomically safe type this is just fine. Conceptually its - * as if the syscall took an instant longer to occur. - */ if (tbuf) { + struct tms tms; struct compat_tms tmp; - struct task_struct *tsk = current; - struct task_struct *t; - cputime_t utime, stime, cutime, cstime; - - read_lock(&tasklist_lock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); - - /* - * While we have tasklist_lock read-locked, no dying thread - * can be updating current->signal->[us]time. Instead, - * we got their counts included in the live thread loop. - * However, another thread can come in right now and - * do a wait call that updates current->signal->c[us]time. - * To make sure we always see that pair updated atomically, - * we take the siglock around fetching them. - */ - spin_lock_irq(&tsk->sighand->siglock); - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); - - tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime)); - tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime)); - tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime)); - tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime)); + + do_sys_times(&tms); + /* Convert our struct tms to the compat version. */ + tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); + tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); + tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); + tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); if (copy_to_user(tbuf, &tmp, sizeof(tmp))) return -EFAULT; } diff --git a/kernel/exit.c b/kernel/exit.c index 16395644a98f..40036ac04271 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -112,8 +112,6 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, task_utime(tsk)); - sig->stime = cputime_add(sig->stime, task_stime(tsk)); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -122,7 +120,6 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); - sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } @@ -1294,6 +1291,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (likely(!traced)) { struct signal_struct *psig; struct signal_struct *sig; + struct task_cputime cputime; /* * The resource counters for the group leader are in its @@ -1309,20 +1307,23 @@ static int wait_task_zombie(struct task_struct *p, int options, * need to protect the access to p->parent->signal fields, * as other threads in the parent group can be right * here reaping other children at the same time. + * + * We use thread_group_cputime() to get times for the thread + * group, which consolidates times for all threads in the + * group including the group leader. */ spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; + thread_group_cputime(p, &cputime); psig->cutime = cputime_add(psig->cutime, - cputime_add(p->utime, - cputime_add(sig->utime, - sig->cutime))); + cputime_add(cputime.utime, + sig->cutime)); psig->cstime = cputime_add(psig->cstime, - cputime_add(p->stime, - cputime_add(sig->stime, - sig->cstime))); + cputime_add(cputime.stime, + sig->cstime)); psig->cgtime = cputime_add(psig->cgtime, cputime_add(p->gtime, diff --git a/kernel/fork.c b/kernel/fork.c index 7ce2ebe84796..a8ac2efb8e30 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -759,15 +759,44 @@ void __cleanup_sighand(struct sighand_struct *sighand) kmem_cache_free(sighand_cachep, sighand); } + +/* + * Initialize POSIX timer handling for a thread group. + */ +static void posix_cpu_timers_init_group(struct signal_struct *sig) +{ + /* Thread group counters. */ + thread_group_cputime_init(sig); + + /* Expiration times and increments. */ + sig->it_virt_expires = cputime_zero; + sig->it_virt_incr = cputime_zero; + sig->it_prof_expires = cputime_zero; + sig->it_prof_incr = cputime_zero; + + /* Cached expiration times. */ + sig->cputime_expires.prof_exp = cputime_zero; + sig->cputime_expires.virt_exp = cputime_zero; + sig->cputime_expires.sched_exp = 0; + + /* The timer lists. */ + INIT_LIST_HEAD(&sig->cpu_timers[0]); + INIT_LIST_HEAD(&sig->cpu_timers[1]); + INIT_LIST_HEAD(&sig->cpu_timers[2]); +} + static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; int ret; if (clone_flags & CLONE_THREAD) { - atomic_inc(¤t->signal->count); - atomic_inc(¤t->signal->live); - return 0; + ret = thread_group_cputime_clone_thread(current, tsk); + if (likely(!ret)) { + atomic_inc(¤t->signal->count); + atomic_inc(¤t->signal->live); + } + return ret; } sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; @@ -795,15 +824,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->it_virt_expires = cputime_zero; - sig->it_virt_incr = cputime_zero; - sig->it_prof_expires = cputime_zero; - sig->it_prof_incr = cputime_zero; - sig->leader = 0; /* session leadership doesn't inherit */ sig->tty_old_pgrp = NULL; - sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; + sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; @@ -820,14 +844,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); - if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { - /* - * New sole thread in the process gets an expiry time - * of the whole CPU time limit. - */ - tsk->it_prof_expires = - secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); - } + posix_cpu_timers_init_group(sig); + acct_init_pacct(&sig->pacct); tty_audit_fork(sig); @@ -837,6 +855,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_signal(struct signal_struct *sig) { + thread_group_cputime_free(sig); exit_thread_group_keys(sig); kmem_cache_free(signal_cachep, sig); } @@ -885,6 +904,19 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +/* + * Initialize POSIX timer handling for a single task. + */ +static void posix_cpu_timers_init(struct task_struct *tsk) +{ + tsk->cputime_expires.prof_exp = cputime_zero; + tsk->cputime_expires.virt_exp = cputime_zero; + tsk->cputime_expires.sched_exp = 0; + INIT_LIST_HEAD(&tsk->cpu_timers[0]); + INIT_LIST_HEAD(&tsk->cpu_timers[1]); + INIT_LIST_HEAD(&tsk->cpu_timers[2]); +} + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -995,12 +1027,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, task_io_accounting_init(&p->ioac); acct_clear_integrals(p); - p->it_virt_expires = cputime_zero; - p->it_prof_expires = cputime_zero; - p->it_sched_expires = 0; - INIT_LIST_HEAD(&p->cpu_timers[0]); - INIT_LIST_HEAD(&p->cpu_timers[1]); - INIT_LIST_HEAD(&p->cpu_timers[2]); + posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ do_posix_clock_monotonic_gettime(&p->start_time); @@ -1201,21 +1228,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (clone_flags & CLONE_THREAD) { p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); - - if (!cputime_eq(current->signal->it_virt_expires, - cputime_zero) || - !cputime_eq(current->signal->it_prof_expires, - cputime_zero) || - current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY || - !list_empty(¤t->signal->cpu_timers[0]) || - !list_empty(¤t->signal->cpu_timers[1]) || - !list_empty(¤t->signal->cpu_timers[2])) { - /* - * Have child wake up on its first tick to check - * for process CPU timers. - */ - p->it_prof_expires = jiffies_to_cputime(1); - } } if (likely(p->pid)) { diff --git a/kernel/itimer.c b/kernel/itimer.c index ab982747d9bd..db7c358b9a02 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -55,17 +55,15 @@ int do_getitimer(int which, struct itimerval *value) spin_unlock_irq(&tsk->sighand->siglock); break; case ITIMER_VIRTUAL: - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_virt_expires; cinterval = tsk->signal->it_virt_incr; if (!cputime_eq(cval, cputime_zero)) { - struct task_struct *t = tsk; - cputime_t utime = tsk->signal->utime; - do { - utime = cputime_add(utime, t->utime); - t = next_thread(t); - } while (t != tsk); + struct task_cputime cputime; + cputime_t utime; + + thread_group_cputime(tsk, &cputime); + utime = cputime.utime; if (cputime_le(cval, utime)) { /* about to fire */ cval = jiffies_to_cputime(1); } else { @@ -73,25 +71,19 @@ int do_getitimer(int which, struct itimerval *value) } } spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); cputime_to_timeval(cval, &value->it_value); cputime_to_timeval(cinterval, &value->it_interval); break; case ITIMER_PROF: - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_prof_expires; cinterval = tsk->signal->it_prof_incr; if (!cputime_eq(cval, cputime_zero)) { - struct task_struct *t = tsk; - cputime_t ptime = cputime_add(tsk->signal->utime, - tsk->signal->stime); - do { - ptime = cputime_add(ptime, - cputime_add(t->utime, - t->stime)); - t = next_thread(t); - } while (t != tsk); + struct task_cputime times; + cputime_t ptime; + + thread_group_cputime(tsk, ×); + ptime = cputime_add(times.utime, times.stime); if (cputime_le(cval, ptime)) { /* about to fire */ cval = jiffies_to_cputime(1); } else { @@ -99,7 +91,6 @@ int do_getitimer(int which, struct itimerval *value) } } spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); cputime_to_timeval(cval, &value->it_value); cputime_to_timeval(cinterval, &value->it_interval); break; @@ -185,7 +176,6 @@ again: case ITIMER_VIRTUAL: nval = timeval_to_cputime(&value->it_value); ninterval = timeval_to_cputime(&value->it_interval); - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_virt_expires; cinterval = tsk->signal->it_virt_incr; @@ -200,7 +190,6 @@ again: tsk->signal->it_virt_expires = nval; tsk->signal->it_virt_incr = ninterval; spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); if (ovalue) { cputime_to_timeval(cval, &ovalue->it_value); cputime_to_timeval(cinterval, &ovalue->it_interval); @@ -209,7 +198,6 @@ again: case ITIMER_PROF: nval = timeval_to_cputime(&value->it_value); ninterval = timeval_to_cputime(&value->it_interval); - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); cval = tsk->signal->it_prof_expires; cinterval = tsk->signal->it_prof_incr; @@ -224,7 +212,6 @@ again: tsk->signal->it_prof_expires = nval; tsk->signal->it_prof_incr = ninterval; spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); if (ovalue) { cputime_to_timeval(cval, &ovalue->it_value); cputime_to_timeval(cinterval, &ovalue->it_interval); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index c42a03aef36f..dba1c334c3e8 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -8,6 +8,99 @@ #include #include +#ifdef CONFIG_SMP +/* + * Allocate the thread_group_cputime structure appropriately for SMP kernels + * and fill in the current values of the fields. Called from copy_signal() + * via thread_group_cputime_clone_thread() when adding a second or subsequent + * thread to a thread group. Assumes interrupts are enabled when called. + */ +int thread_group_cputime_alloc_smp(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + struct task_cputime *cputime; + + /* + * If we have multiple threads and we don't already have a + * per-CPU task_cputime struct, allocate one and fill it in with + * the times accumulated so far. + */ + if (sig->cputime.totals) + return 0; + cputime = alloc_percpu(struct task_cputime); + if (cputime == NULL) + return -ENOMEM; + read_lock(&tasklist_lock); + spin_lock_irq(&tsk->sighand->siglock); + if (sig->cputime.totals) { + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); + free_percpu(cputime); + return 0; + } + sig->cputime.totals = cputime; + cputime = per_cpu_ptr(sig->cputime.totals, get_cpu()); + cputime->utime = tsk->utime; + cputime->stime = tsk->stime; + cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; + put_cpu_no_resched(); + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); + return 0; +} + +/** + * thread_group_cputime_smp - Sum the thread group time fields across all CPUs. + * + * @tsk: The task we use to identify the thread group. + * @times: task_cputime structure in which we return the summed fields. + * + * Walk the list of CPUs to sum the per-CPU time fields in the thread group + * time structure. + */ +void thread_group_cputime_smp( + struct task_struct *tsk, + struct task_cputime *times) +{ + struct signal_struct *sig; + int i; + struct task_cputime *tot; + + sig = tsk->signal; + if (unlikely(!sig) || !sig->cputime.totals) { + times->utime = tsk->utime; + times->stime = tsk->stime; + times->sum_exec_runtime = tsk->se.sum_exec_runtime; + return; + } + times->stime = times->utime = cputime_zero; + times->sum_exec_runtime = 0; + for_each_possible_cpu(i) { + tot = per_cpu_ptr(tsk->signal->cputime.totals, i); + times->utime = cputime_add(times->utime, tot->utime); + times->stime = cputime_add(times->stime, tot->stime); + times->sum_exec_runtime += tot->sum_exec_runtime; + } +} + +#endif /* CONFIG_SMP */ + +/* + * Called after updating RLIMIT_CPU to set timer expiration if necessary. + */ +void update_rlimit_cpu(unsigned long rlim_new) +{ + cputime_t cputime; + + cputime = secs_to_cputime(rlim_new); + if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || + cputime_lt(current->signal->it_prof_expires, cputime)) { + spin_lock_irq(¤t->sighand->siglock); + set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); + spin_unlock_irq(¤t->sighand->siglock); + } +} + static int check_clock(const clockid_t which_clock) { int error = 0; @@ -158,10 +251,6 @@ static inline cputime_t virt_ticks(struct task_struct *p) { return p->utime; } -static inline unsigned long long sched_ns(struct task_struct *p) -{ - return task_sched_runtime(p); -} int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) { @@ -211,7 +300,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, cpu->cpu = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = sched_ns(p); + cpu->sched = task_sched_runtime(p); break; } return 0; @@ -226,31 +315,20 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, struct task_struct *p, union cpu_time_count *cpu) { - struct task_struct *t = p; - switch (clock_idx) { + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + switch (clock_idx) { default: return -EINVAL; case CPUCLOCK_PROF: - cpu->cpu = cputime_add(p->signal->utime, p->signal->stime); - do { - cpu->cpu = cputime_add(cpu->cpu, prof_ticks(t)); - t = next_thread(t); - } while (t != p); + cpu->cpu = cputime_add(cputime.utime, cputime.stime); break; case CPUCLOCK_VIRT: - cpu->cpu = p->signal->utime; - do { - cpu->cpu = cputime_add(cpu->cpu, virt_ticks(t)); - t = next_thread(t); - } while (t != p); + cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = p->signal->sum_sched_runtime; - /* Add in each other live thread. */ - while ((t = next_thread(t)) != p) { - cpu->sched += t->se.sum_exec_runtime; - } - cpu->sched += sched_ns(p); + cpu->sched = thread_group_sched_runtime(p); break; } return 0; @@ -471,80 +549,11 @@ void posix_cpu_timers_exit(struct task_struct *tsk) } void posix_cpu_timers_exit_group(struct task_struct *tsk) { - cleanup_timers(tsk->signal->cpu_timers, - cputime_add(tsk->utime, tsk->signal->utime), - cputime_add(tsk->stime, tsk->signal->stime), - tsk->se.sum_exec_runtime + tsk->signal->sum_sched_runtime); -} - - -/* - * Set the expiry times of all the threads in the process so one of them - * will go off before the process cumulative expiry total is reached. - */ -static void process_timer_rebalance(struct task_struct *p, - unsigned int clock_idx, - union cpu_time_count expires, - union cpu_time_count val) -{ - cputime_t ticks, left; - unsigned long long ns, nsleft; - struct task_struct *t = p; - unsigned int nthreads = atomic_read(&p->signal->live); - - if (!nthreads) - return; + struct task_cputime cputime; - switch (clock_idx) { - default: - BUG(); - break; - case CPUCLOCK_PROF: - left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), - nthreads); - do { - if (likely(!(t->flags & PF_EXITING))) { - ticks = cputime_add(prof_ticks(t), left); - if (cputime_eq(t->it_prof_expires, - cputime_zero) || - cputime_gt(t->it_prof_expires, ticks)) { - t->it_prof_expires = ticks; - } - } - t = next_thread(t); - } while (t != p); - break; - case CPUCLOCK_VIRT: - left = cputime_div_non_zero(cputime_sub(expires.cpu, val.cpu), - nthreads); - do { - if (likely(!(t->flags & PF_EXITING))) { - ticks = cputime_add(virt_ticks(t), left); - if (cputime_eq(t->it_virt_expires, - cputime_zero) || - cputime_gt(t->it_virt_expires, ticks)) { - t->it_virt_expires = ticks; - } - } - t = next_thread(t); - } while (t != p); - break; - case CPUCLOCK_SCHED: - nsleft = expires.sched - val.sched; - do_div(nsleft, nthreads); - nsleft = max_t(unsigned long long, nsleft, 1); - do { - if (likely(!(t->flags & PF_EXITING))) { - ns = t->se.sum_exec_runtime + nsleft; - if (t->it_sched_expires == 0 || - t->it_sched_expires > ns) { - t->it_sched_expires = ns; - } - } - t = next_thread(t); - } while (t != p); - break; - } + thread_group_cputime(tsk, &cputime); + cleanup_timers(tsk->signal->cpu_timers, + cputime.utime, cputime.stime, cputime.sum_exec_runtime); } static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) @@ -608,29 +617,32 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) default: BUG(); case CPUCLOCK_PROF: - if (cputime_eq(p->it_prof_expires, + if (cputime_eq(p->cputime_expires.prof_exp, cputime_zero) || - cputime_gt(p->it_prof_expires, + cputime_gt(p->cputime_expires.prof_exp, nt->expires.cpu)) - p->it_prof_expires = nt->expires.cpu; + p->cputime_expires.prof_exp = + nt->expires.cpu; break; case CPUCLOCK_VIRT: - if (cputime_eq(p->it_virt_expires, + if (cputime_eq(p->cputime_expires.virt_exp, cputime_zero) || - cputime_gt(p->it_virt_expires, + cputime_gt(p->cputime_expires.virt_exp, nt->expires.cpu)) - p->it_virt_expires = nt->expires.cpu; + p->cputime_expires.virt_exp = + nt->expires.cpu; break; case CPUCLOCK_SCHED: - if (p->it_sched_expires == 0 || - p->it_sched_expires > nt->expires.sched) - p->it_sched_expires = nt->expires.sched; + if (p->cputime_expires.sched_exp == 0 || + p->cputime_expires.sched_exp > + nt->expires.sched) + p->cputime_expires.sched_exp = + nt->expires.sched; break; } } else { /* - * For a process timer, we must balance - * all the live threads' expirations. + * For a process timer, set the cached expiration time. */ switch (CPUCLOCK_WHICH(timer->it_clock)) { default: @@ -641,7 +653,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) cputime_lt(p->signal->it_virt_expires, timer->it.cpu.expires.cpu)) break; - goto rebalance; + p->signal->cputime_expires.virt_exp = + timer->it.cpu.expires.cpu; + break; case CPUCLOCK_PROF: if (!cputime_eq(p->signal->it_prof_expires, cputime_zero) && @@ -652,13 +666,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) if (i != RLIM_INFINITY && i <= cputime_to_secs(timer->it.cpu.expires.cpu)) break; - goto rebalance; + p->signal->cputime_expires.prof_exp = + timer->it.cpu.expires.cpu; + break; case CPUCLOCK_SCHED: - rebalance: - process_timer_rebalance( - timer->it.cpu.task, - CPUCLOCK_WHICH(timer->it_clock), - timer->it.cpu.expires, now); + p->signal->cputime_expires.sched_exp = + timer->it.cpu.expires.sched; break; } } @@ -969,13 +982,13 @@ static void check_thread_timers(struct task_struct *tsk, struct signal_struct *const sig = tsk->signal; maxfire = 20; - tsk->it_prof_expires = cputime_zero; + tsk->cputime_expires.prof_exp = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) { - tsk->it_prof_expires = t->expires.cpu; + tsk->cputime_expires.prof_exp = t->expires.cpu; break; } t->firing = 1; @@ -984,13 +997,13 @@ static void check_thread_timers(struct task_struct *tsk, ++timers; maxfire = 20; - tsk->it_virt_expires = cputime_zero; + tsk->cputime_expires.virt_exp = cputime_zero; while (!list_empty(timers)) { struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) { - tsk->it_virt_expires = t->expires.cpu; + tsk->cputime_expires.virt_exp = t->expires.cpu; break; } t->firing = 1; @@ -999,13 +1012,13 @@ static void check_thread_timers(struct task_struct *tsk, ++timers; maxfire = 20; - tsk->it_sched_expires = 0; + tsk->cputime_expires.sched_exp = 0; while (!list_empty(timers)) { struct cpu_timer_list *t = list_first_entry(timers, struct cpu_timer_list, entry); if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) { - tsk->it_sched_expires = t->expires.sched; + tsk->cputime_expires.sched_exp = t->expires.sched; break; } t->firing = 1; @@ -1055,10 +1068,10 @@ static void check_process_timers(struct task_struct *tsk, { int maxfire; struct signal_struct *const sig = tsk->signal; - cputime_t utime, stime, ptime, virt_expires, prof_expires; + cputime_t utime, ptime, virt_expires, prof_expires; unsigned long long sum_sched_runtime, sched_expires; - struct task_struct *t; struct list_head *timers = sig->cpu_timers; + struct task_cputime cputime; /* * Don't sample the current process CPU clocks if there are no timers. @@ -1074,18 +1087,10 @@ static void check_process_timers(struct task_struct *tsk, /* * Collect the current process totals. */ - utime = sig->utime; - stime = sig->stime; - sum_sched_runtime = sig->sum_sched_runtime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - sum_sched_runtime += t->se.sum_exec_runtime; - t = next_thread(t); - } while (t != tsk); - ptime = cputime_add(utime, stime); - + thread_group_cputime(tsk, &cputime); + utime = cputime.utime; + ptime = cputime_add(utime, cputime.stime); + sum_sched_runtime = cputime.sum_exec_runtime; maxfire = 20; prof_expires = cputime_zero; while (!list_empty(timers)) { @@ -1193,60 +1198,18 @@ static void check_process_timers(struct task_struct *tsk, } } - if (!cputime_eq(prof_expires, cputime_zero) || - !cputime_eq(virt_expires, cputime_zero) || - sched_expires != 0) { - /* - * Rebalance the threads' expiry times for the remaining - * process CPU timers. - */ - - cputime_t prof_left, virt_left, ticks; - unsigned long long sched_left, sched; - const unsigned int nthreads = atomic_read(&sig->live); - - if (!nthreads) - return; - - prof_left = cputime_sub(prof_expires, utime); - prof_left = cputime_sub(prof_left, stime); - prof_left = cputime_div_non_zero(prof_left, nthreads); - virt_left = cputime_sub(virt_expires, utime); - virt_left = cputime_div_non_zero(virt_left, nthreads); - if (sched_expires) { - sched_left = sched_expires - sum_sched_runtime; - do_div(sched_left, nthreads); - sched_left = max_t(unsigned long long, sched_left, 1); - } else { - sched_left = 0; - } - t = tsk; - do { - if (unlikely(t->flags & PF_EXITING)) - continue; - - ticks = cputime_add(cputime_add(t->utime, t->stime), - prof_left); - if (!cputime_eq(prof_expires, cputime_zero) && - (cputime_eq(t->it_prof_expires, cputime_zero) || - cputime_gt(t->it_prof_expires, ticks))) { - t->it_prof_expires = ticks; - } - - ticks = cputime_add(t->utime, virt_left); - if (!cputime_eq(virt_expires, cputime_zero) && - (cputime_eq(t->it_virt_expires, cputime_zero) || - cputime_gt(t->it_virt_expires, ticks))) { - t->it_virt_expires = ticks; - } - - sched = t->se.sum_exec_runtime + sched_left; - if (sched_expires && (t->it_sched_expires == 0 || - t->it_sched_expires > sched)) { - t->it_sched_expires = sched; - } - } while ((t = next_thread(t)) != tsk); - } + if (!cputime_eq(prof_expires, cputime_zero) && + (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || + cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) + sig->cputime_expires.prof_exp = prof_expires; + if (!cputime_eq(virt_expires, cputime_zero) && + (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) || + cputime_gt(sig->cputime_expires.virt_exp, virt_expires))) + sig->cputime_expires.virt_exp = virt_expires; + if (sched_expires != 0 && + (sig->cputime_expires.sched_exp == 0 || + sig->cputime_expires.sched_exp > sched_expires)) + sig->cputime_expires.sched_exp = sched_expires; } /* @@ -1314,6 +1277,78 @@ out: ++timer->it_requeue_pending; } +/** + * task_cputime_zero - Check a task_cputime struct for all zero fields. + * + * @cputime: The struct to compare. + * + * Checks @cputime to see if all fields are zero. Returns true if all fields + * are zero, false if any field is nonzero. + */ +static inline int task_cputime_zero(const struct task_cputime *cputime) +{ + if (cputime_eq(cputime->utime, cputime_zero) && + cputime_eq(cputime->stime, cputime_zero) && + cputime->sum_exec_runtime == 0) + return 1; + return 0; +} + +/** + * task_cputime_expired - Compare two task_cputime entities. + * + * @sample: The task_cputime structure to be checked for expiration. + * @expires: Expiration times, against which @sample will be checked. + * + * Checks @sample against @expires to see if any field of @sample has expired. + * Returns true if any field of the former is greater than the corresponding + * field of the latter if the latter field is set. Otherwise returns false. + */ +static inline int task_cputime_expired(const struct task_cputime *sample, + const struct task_cputime *expires) +{ + if (!cputime_eq(expires->utime, cputime_zero) && + cputime_ge(sample->utime, expires->utime)) + return 1; + if (!cputime_eq(expires->stime, cputime_zero) && + cputime_ge(cputime_add(sample->utime, sample->stime), + expires->stime)) + return 1; + if (expires->sum_exec_runtime != 0 && + sample->sum_exec_runtime >= expires->sum_exec_runtime) + return 1; + return 0; +} + +/** + * fastpath_timer_check - POSIX CPU timers fast path. + * + * @tsk: The task (thread) being checked. + * @sig: The signal pointer for that task. + * + * If there are no timers set return false. Otherwise snapshot the task and + * thread group timers, then compare them with the corresponding expiration + # times. Returns true if a timer has expired, else returns false. + */ +static inline int fastpath_timer_check(struct task_struct *tsk, + struct signal_struct *sig) +{ + struct task_cputime task_sample = { + .utime = tsk->utime, + .stime = tsk->stime, + .sum_exec_runtime = tsk->se.sum_exec_runtime + }; + struct task_cputime group_sample; + + if (task_cputime_zero(&tsk->cputime_expires) && + task_cputime_zero(&sig->cputime_expires)) + return 0; + if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) + return 1; + thread_group_cputime(tsk, &group_sample); + return task_cputime_expired(&group_sample, &sig->cputime_expires); +} + /* * This is called from the timer interrupt handler. The irq handler has * already updated our counts. We need to check if any timers fire now. @@ -1323,30 +1358,29 @@ void run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; + struct signal_struct *sig; + struct sighand_struct *sighand; + unsigned long flags; BUG_ON(!irqs_disabled()); -#define UNEXPIRED(clock) \ - (cputime_eq(tsk->it_##clock##_expires, cputime_zero) || \ - cputime_lt(clock##_ticks(tsk), tsk->it_##clock##_expires)) - - if (UNEXPIRED(prof) && UNEXPIRED(virt) && - (tsk->it_sched_expires == 0 || - tsk->se.sum_exec_runtime < tsk->it_sched_expires)) - return; - -#undef UNEXPIRED - + /* Pick up tsk->signal and make sure it's valid. */ + sig = tsk->signal; /* - * Double-check with locks held. + * The fast path checks that there are no expired thread or thread + * group timers. If that's so, just return. Also check that + * tsk->signal is non-NULL; this probably can't happen but cover the + * possibility anyway. */ - read_lock(&tasklist_lock); - if (likely(tsk->signal != NULL)) { - spin_lock(&tsk->sighand->siglock); - + if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) { + return; + } + sighand = lock_task_sighand(tsk, &flags); + if (likely(sighand)) { /* - * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N] - * all the timers that are firing, and put them on the firing list. + * Here we take off tsk->signal->cpu_timers[N] and + * tsk->cpu_timers[N] all the timers that are firing, and + * put them on the firing list. */ check_thread_timers(tsk, &firing); check_process_timers(tsk, &firing); @@ -1359,9 +1393,8 @@ void run_posix_cpu_timers(struct task_struct *tsk) * that gets the timer lock before we do will give it up and * spin until we've taken care of that timer below. */ - spin_unlock(&tsk->sighand->siglock); } - read_unlock(&tasklist_lock); + unlock_task_sighand(tsk, &flags); /* * Now that all the timers on our list have the firing flag, @@ -1389,10 +1422,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) /* * Set one of the process-wide special case CPU timers. - * The tasklist_lock and tsk->sighand->siglock must be held by the caller. - * The oldval argument is null for the RLIMIT_CPU timer, where *newval is - * absolute; non-null for ITIMER_*, where *newval is relative and we update - * it to be absolute, *oldval is absolute and we update it to be relative. + * The tsk->sighand->siglock must be held by the caller. + * The *newval argument is relative and we update it to be absolute, *oldval + * is absolute and we update it to be relative. */ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_t *newval, cputime_t *oldval) @@ -1435,13 +1467,14 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, cputime_ge(list_first_entry(head, struct cpu_timer_list, entry)->expires.cpu, *newval)) { - /* - * Rejigger each thread's expiry time so that one will - * notice before we hit the process-cumulative expiry time. - */ - union cpu_time_count expires = { .sched = 0 }; - expires.cpu = *newval; - process_timer_rebalance(tsk, clock_idx, expires, now); + switch (clock_idx) { + case CPUCLOCK_PROF: + tsk->signal->cputime_expires.prof_exp = *newval; + break; + case CPUCLOCK_VIRT: + tsk->signal->cputime_expires.virt_exp = *newval; + break; + } } } diff --git a/kernel/sched.c b/kernel/sched.c index cc1f81b50b82..c51b5d276665 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4036,6 +4036,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); EXPORT_PER_CPU_SYMBOL(kstat); +/* + * Return any ns on the sched_clock that have not yet been banked in + * @p in case that task is currently running. + * + * Called with task_rq_lock() held on @rq. + */ +static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq) +{ + if (task_current(rq, p)) { + u64 delta_exec; + + update_rq_clock(rq); + delta_exec = rq->clock - p->se.exec_start; + if ((s64)delta_exec > 0) + return delta_exec; + } + return 0; +} + /* * Return p->sum_exec_runtime plus any more ns on the sched_clock * that have not yet been banked in case the task is currently running. @@ -4043,17 +4062,31 @@ EXPORT_PER_CPU_SYMBOL(kstat); unsigned long long task_sched_runtime(struct task_struct *p) { unsigned long flags; - u64 ns, delta_exec; + u64 ns; struct rq *rq; rq = task_rq_lock(p, &flags); - ns = p->se.sum_exec_runtime; - if (task_current(rq, p)) { - update_rq_clock(rq); - delta_exec = rq->clock - p->se.exec_start; - if ((s64)delta_exec > 0) - ns += delta_exec; - } + ns = p->se.sum_exec_runtime + task_delta_exec(p, rq); + task_rq_unlock(rq, &flags); + + return ns; +} + +/* + * Return sum_exec_runtime for the thread group plus any more ns on the + * sched_clock that have not yet been banked in case the task is currently + * running. + */ +unsigned long long thread_group_sched_runtime(struct task_struct *p) +{ + unsigned long flags; + u64 ns; + struct rq *rq; + struct task_cputime totals; + + rq = task_rq_lock(p, &flags); + thread_group_cputime(p, &totals); + ns = totals.sum_exec_runtime + task_delta_exec(p, rq); task_rq_unlock(rq, &flags); return ns; @@ -4070,6 +4103,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) cputime64_t tmp; p->utime = cputime_add(p->utime, cputime); + account_group_user_time(p, cputime); /* Add user time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -4094,6 +4128,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime) tmp = cputime_to_cputime64(cputime); p->utime = cputime_add(p->utime, cputime); + account_group_user_time(p, cputime); p->gtime = cputime_add(p->gtime, cputime); cpustat->user = cputime64_add(cpustat->user, tmp); @@ -4129,6 +4164,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, } p->stime = cputime_add(p->stime, cputime); + account_group_system_time(p, cputime); /* Add system time to cpustat. */ tmp = cputime_to_cputime64(cputime); @@ -4170,6 +4206,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal) if (p == rq->idle) { p->stime = cputime_add(p->stime, steal); + account_group_system_time(p, steal); if (atomic_read(&rq->nr_iowait) > 0) cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fb8994c6d4bb..99aa31acc544 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -507,6 +507,7 @@ static void update_curr(struct cfs_rq *cfs_rq) struct task_struct *curtask = task_of(curr); cpuacct_charge(curtask, delta_exec); + account_group_exec_runtime(curtask, delta_exec); } } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 552310798dad..8375e69af36a 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -483,6 +483,8 @@ static void update_curr_rt(struct rq *rq) schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; + account_group_exec_runtime(curr, delta_exec); + curr->se.exec_start = rq->clock; cpuacct_charge(curr, delta_exec); @@ -1412,7 +1414,7 @@ static void watchdog(struct rq *rq, struct task_struct *p) p->rt.timeout++; next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) - p->it_sched_expires = p->se.sum_exec_runtime; + p->cputime_expires.sched_exp = p->se.sum_exec_runtime; } } diff --git a/kernel/signal.c b/kernel/signal.c index e661b01d340f..6eea5826d618 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1338,6 +1338,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) struct siginfo info; unsigned long flags; struct sighand_struct *psig; + struct task_cputime cputime; int ret = sig; BUG_ON(sig == -1); @@ -1368,10 +1369,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_uid = tsk->uid; - info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, - tsk->signal->utime)); - info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, - tsk->signal->stime)); + thread_group_cputime(tsk, &cputime); + info.si_utime = cputime_to_jiffies(cputime.utime); + info.si_stime = cputime_to_jiffies(cputime.stime); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) diff --git a/kernel/sys.c b/kernel/sys.c index 038a7bc0901d..d046a7a055c2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -853,38 +853,28 @@ asmlinkage long sys_setfsgid(gid_t gid) return old_fsgid; } +void do_sys_times(struct tms *tms) +{ + struct task_cputime cputime; + cputime_t cutime, cstime; + + spin_lock_irq(¤t->sighand->siglock); + thread_group_cputime(current, &cputime); + cutime = current->signal->cutime; + cstime = current->signal->cstime; + spin_unlock_irq(¤t->sighand->siglock); + tms->tms_utime = cputime_to_clock_t(cputime.utime); + tms->tms_stime = cputime_to_clock_t(cputime.stime); + tms->tms_cutime = cputime_to_clock_t(cutime); + tms->tms_cstime = cputime_to_clock_t(cstime); +} + asmlinkage long sys_times(struct tms __user * tbuf) { - /* - * In the SMP world we might just be unlucky and have one of - * the times increment as we use it. Since the value is an - * atomically safe type this is just fine. Conceptually its - * as if the syscall took an instant longer to occur. - */ if (tbuf) { struct tms tmp; - struct task_struct *tsk = current; - struct task_struct *t; - cputime_t utime, stime, cutime, cstime; - - spin_lock_irq(&tsk->sighand->siglock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); - - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - - tmp.tms_utime = cputime_to_clock_t(utime); - tmp.tms_stime = cputime_to_clock_t(stime); - tmp.tms_cutime = cputime_to_clock_t(cutime); - tmp.tms_cstime = cputime_to_clock_t(cstime); + + do_sys_times(&tmp); if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) return -EFAULT; } @@ -1445,7 +1435,6 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) { struct rlimit new_rlim, *old_rlim; - unsigned long it_prof_secs; int retval; if (resource >= RLIM_NLIMITS) @@ -1491,18 +1480,7 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) if (new_rlim.rlim_cur == RLIM_INFINITY) goto out; - it_prof_secs = cputime_to_secs(current->signal->it_prof_expires); - if (it_prof_secs == 0 || new_rlim.rlim_cur <= it_prof_secs) { - unsigned long rlim_cur = new_rlim.rlim_cur; - cputime_t cputime; - - cputime = secs_to_cputime(rlim_cur); - read_lock(&tasklist_lock); - spin_lock_irq(¤t->sighand->siglock); - set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); - spin_unlock_irq(¤t->sighand->siglock); - read_unlock(&tasklist_lock); - } + update_rlimit_cpu(new_rlim.rlim_cur); out: return 0; } @@ -1540,11 +1518,8 @@ out: * */ -static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r, - cputime_t *utimep, cputime_t *stimep) +static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) { - *utimep = cputime_add(*utimep, t->utime); - *stimep = cputime_add(*stimep, t->stime); r->ru_nvcsw += t->nvcsw; r->ru_nivcsw += t->nivcsw; r->ru_minflt += t->min_flt; @@ -1558,12 +1533,13 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) struct task_struct *t; unsigned long flags; cputime_t utime, stime; + struct task_cputime cputime; memset((char *) r, 0, sizeof *r); utime = stime = cputime_zero; if (who == RUSAGE_THREAD) { - accumulate_thread_rusage(p, r, &utime, &stime); + accumulate_thread_rusage(p, r); goto out; } @@ -1586,8 +1562,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) break; case RUSAGE_SELF: - utime = cputime_add(utime, p->signal->utime); - stime = cputime_add(stime, p->signal->stime); + thread_group_cputime(p, &cputime); + utime = cputime_add(utime, cputime.utime); + stime = cputime_add(stime, cputime.stime); r->ru_nvcsw += p->signal->nvcsw; r->ru_nivcsw += p->signal->nivcsw; r->ru_minflt += p->signal->min_flt; @@ -1596,7 +1573,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) r->ru_oublock += p->signal->oublock; t = p; do { - accumulate_thread_rusage(t, r, &utime, &stime); + accumulate_thread_rusage(t, r); t = next_thread(t); } while (t != p); break; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 03fc6a81ae32..69649783c266 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -75,6 +75,7 @@ #include #include #include +#include #include "avc.h" #include "objsec.h" @@ -2321,13 +2322,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) initrlim = init_task.signal->rlim+i; rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - if (current->signal->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { - /* - * This will cause RLIMIT_CPU calculations - * to be refigured. - */ - current->it_prof_expires = jiffies_to_cputime(1); - } + update_rlimit_cpu(rlim->rlim_cur); } /* Wake up the parent if it is waiting so that it can -- cgit v1.2.3 From 0a8eaa4f9b58759595a1bfe13a1295fdc25ba026 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 17:03:52 +0200 Subject: timers: fix itimer/many thread hang, fix #2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix the UP build: In file included from arch/x86/kernel/asm-offsets_32.c:9, from arch/x86/kernel/asm-offsets.c:3: include/linux/sched.h: In function ‘thread_group_cputime_clone_thread’: include/linux/sched.h:2272: warning: no return statement in function returning non-void include/linux/sched.h: In function ‘thread_group_cputime_account_user’: include/linux/sched.h:2284: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h:2284: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h: In function ‘thread_group_cputime_account_system’: include/linux/sched.h:2291: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h:2291: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) include/linux/sched.h: In function ‘thread_group_cputime_account_exec_runtime’: include/linux/sched.h:2298: error: invalid type argument of ‘->’ (have ‘struct task_cputime’) distcc[14501] ERROR: compile arch/x86/kernel/asm-offsets.c on a/30 failed make[1]: *** [arch/x86/kernel/asm-offsets.s] Error 1 Signed-off-by: Ingo Molnar --- include/linux/sched.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26d7a5f2d0ba..ed355f02d329 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2251,6 +2251,7 @@ static inline void thread_group_cputime_free(struct signal_struct *sig) static inline int thread_group_cputime_clone_thread(struct task_struct *curr, struct task_struct *tsk) { + return 0; } static inline void thread_group_cputime(struct task_struct *tsk, @@ -2263,21 +2264,21 @@ static inline void thread_group_cputime_account_user( struct thread_group_cputime *tgtimes, cputime_t cputime) { - tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); + tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime); } static inline void thread_group_cputime_account_system( struct thread_group_cputime *tgtimes, cputime_t cputime) { - tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); + tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime); } static inline void thread_group_cputime_account_exec_runtime( struct thread_group_cputime *tgtimes, unsigned long long ns) { - tgtimes->totals->sum_exec_runtime += ns; + tgtimes->totals.sum_exec_runtime += ns; } #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 5ce73a4a5a4893a1aa4cdeed1b1a5a6de42c43b6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 17:11:46 +0200 Subject: timers: fix itimer/many thread hang, cleanups Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/posix-cpu-timers.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index ed355f02d329..7ce8d4e53565 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -430,7 +430,7 @@ struct pacct_struct { * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds - * + * * This structure groups together three kinds of CPU time that are * tracked for threads and thread groups. Most things considering * CPU time want to group these counts together and treat all three diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index dba1c334c3e8..9a7ea049fcdc 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -94,7 +94,7 @@ void update_rlimit_cpu(unsigned long rlim_new) cputime = secs_to_cputime(rlim_new); if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || - cputime_lt(current->signal->it_prof_expires, cputime)) { + cputime_lt(current->signal->it_prof_expires, cputime)) { spin_lock_irq(¤t->sighand->siglock); set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); spin_unlock_irq(¤t->sighand->siglock); @@ -1372,9 +1372,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) * tsk->signal is non-NULL; this probably can't happen but cover the * possibility anyway. */ - if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) { + if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) return; - } + sighand = lock_task_sighand(tsk, &flags); if (likely(sighand)) { /* -- cgit v1.2.3 From ccbe329bcd87924baed96474ec0a6725e3957897 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:03 +0100 Subject: bitmap: add bitmap_copy_le() bitmap_copy_le() copies a bitmap, putting the bits into little-endian order (i.e., each unsigned long word in the bitmap is put into little-endian order). The UWB stack used bitmaps to manage Medium Access Slot availability, and these bitmaps need to be written to the hardware in LE order. Signed-off-by: David Vrabel --- include/linux/bitmap.h | 1 + lib/bitmap.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 89781fd48859..5379913aca52 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -130,6 +130,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, int pos, int order); +extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); #define BITMAP_LAST_WORD_MASK(nbits) \ ( \ diff --git a/lib/bitmap.c b/lib/bitmap.c index 06fb57c86de0..c2006bfeea41 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1007,3 +1007,25 @@ int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) return 0; } EXPORT_SYMBOL(bitmap_allocate_region); + +/** + * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. + * @dst: destination buffer + * @src: bitmap to copy + * @nbits: number of bits in the bitmap + * + * Require nbits % BITS_PER_LONG == 0. + */ +void bitmap_copy_le(void *dst, const unsigned long *src, int nbits) +{ + unsigned long *d = dst; + int i; + + for (i = 0; i < nbits/BITS_PER_LONG; i++) { + if (BITS_PER_LONG == 64) + d[i] = cpu_to_le64(src[i]); + else + d[i] = cpu_to_le32(src[i]); + } +} +EXPORT_SYMBOL(bitmap_copy_le); -- cgit v1.2.3 From 34e95e41f1fd751e33a7eb3fa66594903b81f13d Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 17 Sep 2008 16:34:05 +0100 Subject: uwb: add the uwb include files Signed-off-by: David Vrabel --- include/linux/uwb.h | 761 ++++++++++++++++++++++++++++++++++++++++++ include/linux/uwb/debug-cmd.h | 57 ++++ include/linux/uwb/debug.h | 82 +++++ include/linux/uwb/spec.h | 727 ++++++++++++++++++++++++++++++++++++++++ include/linux/wlp.h | 735 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 2362 insertions(+) create mode 100644 include/linux/uwb.h create mode 100644 include/linux/uwb/debug-cmd.h create mode 100644 include/linux/uwb/debug.h create mode 100644 include/linux/uwb/spec.h create mode 100644 include/linux/wlp.h (limited to 'include') diff --git a/include/linux/uwb.h b/include/linux/uwb.h new file mode 100644 index 000000000000..0cd35937e120 --- /dev/null +++ b/include/linux/uwb.h @@ -0,0 +1,761 @@ +/* + * Ultra Wide Band + * UWB API + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: doc: overview of the API, different parts and pointers + */ + +#ifndef __LINUX__UWB_H__ +#define __LINUX__UWB_H__ + +#include +#include +#include +#include +#include +#include + +struct uwb_dev; +struct uwb_beca_e; +struct uwb_rc; +struct uwb_rsv; +struct uwb_dbg; + +/** + * struct uwb_dev - a UWB Device + * @rc: UWB Radio Controller that discovered the device (kind of its + * parent). + * @bce: a beacon cache entry for this device; or NULL if the device + * is a local radio controller. + * @mac_addr: the EUI-48 address of this device. + * @dev_addr: the current DevAddr used by this device. + * @beacon_slot: the slot number the beacon is using. + * @streams: bitmap of streams allocated to reservations targeted at + * this device. For an RC, this is the streams allocated for + * reservations targeted at DevAddrs. + * + * A UWB device may either by a neighbor or part of a local radio + * controller. + */ +struct uwb_dev { + struct mutex mutex; + struct list_head list_node; + struct device dev; + struct uwb_rc *rc; /* radio controller */ + struct uwb_beca_e *bce; /* Beacon Cache Entry */ + + struct uwb_mac_addr mac_addr; + struct uwb_dev_addr dev_addr; + int beacon_slot; + DECLARE_BITMAP(streams, UWB_NUM_STREAMS); +}; +#define to_uwb_dev(d) container_of(d, struct uwb_dev, dev) + +/** + * UWB HWA/WHCI Radio Control {Command|Event} Block context IDs + * + * RC[CE]Bs have a 'context ID' field that matches the command with + * the event received to confirm it. + * + * Maximum number of context IDs + */ +enum { UWB_RC_CTX_MAX = 256 }; + + +/** Notification chain head for UWB generated events to listeners */ +struct uwb_notifs_chain { + struct list_head list; + struct mutex mutex; +}; + +/** + * struct uwb_mas_bm - a bitmap of all MAS in a superframe + * @bm: a bitmap of length #UWB_NUM_MAS + */ +struct uwb_mas_bm { + DECLARE_BITMAP(bm, UWB_NUM_MAS); +}; + +/** + * uwb_rsv_state - UWB Reservation state. + * + * NONE - reservation is not active (no DRP IE being transmitted). + * + * Owner reservation states: + * + * INITIATED - owner has sent an initial DRP request. + * PENDING - target responded with pending Reason Code. + * MODIFIED - reservation manager is modifying an established + * reservation with a different MAS allocation. + * ESTABLISHED - the reservation has been successfully negotiated. + * + * Target reservation states: + * + * DENIED - request is denied. + * ACCEPTED - request is accepted. + * PENDING - PAL has yet to make a decision to whether to accept or + * deny. + * + * FIXME: further target states TBD. + */ +enum uwb_rsv_state { + UWB_RSV_STATE_NONE, + UWB_RSV_STATE_O_INITIATED, + UWB_RSV_STATE_O_PENDING, + UWB_RSV_STATE_O_MODIFIED, + UWB_RSV_STATE_O_ESTABLISHED, + UWB_RSV_STATE_T_ACCEPTED, + UWB_RSV_STATE_T_DENIED, + UWB_RSV_STATE_T_PENDING, + + UWB_RSV_STATE_LAST, +}; + +enum uwb_rsv_target_type { + UWB_RSV_TARGET_DEV, + UWB_RSV_TARGET_DEVADDR, +}; + +/** + * struct uwb_rsv_target - the target of a reservation. + * + * Reservations unicast and targeted at a single device + * (UWB_RSV_TARGET_DEV); or (e.g., in the case of WUSB) targeted at a + * specific (private) DevAddr (UWB_RSV_TARGET_DEVADDR). + */ +struct uwb_rsv_target { + enum uwb_rsv_target_type type; + union { + struct uwb_dev *dev; + struct uwb_dev_addr devaddr; + }; +}; + +/* + * Number of streams reserved for reservations targeted at DevAddrs. + */ +#define UWB_NUM_GLOBAL_STREAMS 1 + +typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv); + +/** + * struct uwb_rsv - a DRP reservation + * + * Data structure management: + * + * @rc: the radio controller this reservation is for + * (as target or owner) + * @rc_node: a list node for the RC + * @pal_node: a list node for the PAL + * + * Owner and target parameters: + * + * @owner: the UWB device owning this reservation + * @target: the target UWB device + * @type: reservation type + * + * Owner parameters: + * + * @max_mas: maxiumum number of MAS + * @min_mas: minimum number of MAS + * @sparsity: owner selected sparsity + * @is_multicast: true iff multicast + * + * @callback: callback function when the reservation completes + * @pal_priv: private data for the PAL making the reservation + * + * Reservation status: + * + * @status: negotiation status + * @stream: stream index allocated for this reservation + * @mas: reserved MAS + * @drp_ie: the DRP IE + * @ie_valid: true iff the DRP IE matches the reservation parameters + * + * DRP reservations are uniquely identified by the owner, target and + * stream index. However, when using a DevAddr as a target (e.g., for + * a WUSB cluster reservation) the responses may be received from + * devices with different DevAddrs. In this case, reservations are + * uniquely identified by just the stream index. A number of stream + * indexes (UWB_NUM_GLOBAL_STREAMS) are reserved for this. + */ +struct uwb_rsv { + struct uwb_rc *rc; + struct list_head rc_node; + struct list_head pal_node; + + struct uwb_dev *owner; + struct uwb_rsv_target target; + enum uwb_drp_type type; + int max_mas; + int min_mas; + int sparsity; + bool is_multicast; + + uwb_rsv_cb_f callback; + void *pal_priv; + + enum uwb_rsv_state state; + u8 stream; + struct uwb_mas_bm mas; + struct uwb_ie_drp *drp_ie; + bool ie_valid; + struct timer_list timer; + bool expired; +}; + +static const +struct uwb_mas_bm uwb_mas_bm_zero = { .bm = { 0 } }; + +static inline void uwb_mas_bm_copy_le(void *dst, const struct uwb_mas_bm *mas) +{ + bitmap_copy_le(dst, mas->bm, UWB_NUM_MAS); +} + +/** + * struct uwb_drp_avail - a radio controller's view of MAS usage + * @global: MAS unused by neighbors (excluding reservations targetted + * or owned by the local radio controller) or the beaon period + * @local: MAS unused by local established reservations + * @pending: MAS unused by local pending reservations + * @ie: DRP Availability IE to be included in the beacon + * @ie_valid: true iff @ie is valid and does not need to regenerated from + * @global and @local + * + * Each radio controller maintains a view of MAS usage or + * availability. MAS available for a new reservation are determined + * from the intersection of @global, @local, and @pending. + * + * The radio controller must transmit a DRP Availability IE that's the + * intersection of @global and @local. + * + * A set bit indicates the MAS is unused and available. + * + * rc->rsvs_mutex should be held before accessing this data structure. + * + * [ECMA-368] section 17.4.3. + */ +struct uwb_drp_avail { + DECLARE_BITMAP(global, UWB_NUM_MAS); + DECLARE_BITMAP(local, UWB_NUM_MAS); + DECLARE_BITMAP(pending, UWB_NUM_MAS); + struct uwb_ie_drp_avail ie; + bool ie_valid; +}; + + +const char *uwb_rsv_state_str(enum uwb_rsv_state state); +const char *uwb_rsv_type_str(enum uwb_drp_type type); + +struct uwb_rsv *uwb_rsv_create(struct uwb_rc *rc, uwb_rsv_cb_f cb, + void *pal_priv); +void uwb_rsv_destroy(struct uwb_rsv *rsv); + +int uwb_rsv_establish(struct uwb_rsv *rsv); +int uwb_rsv_modify(struct uwb_rsv *rsv, + int max_mas, int min_mas, int sparsity); +void uwb_rsv_terminate(struct uwb_rsv *rsv); + +void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv); + +/** + * Radio Control Interface instance + * + * + * Life cycle rules: those of the UWB Device. + * + * @index: an index number for this radio controller, as used in the + * device name. + * @version: version of protocol supported by this device + * @priv: Backend implementation; rw with uwb_dev.dev.sem taken. + * @cmd: Backend implementation to execute commands; rw and call + * only with uwb_dev.dev.sem taken. + * @reset: Hardware reset of radio controller and any PAL controllers. + * @filter: Backend implementation to manipulate data to and from device + * to be compliant to specification assumed by driver (WHCI + * 0.95). + * + * uwb_dev.dev.mutex is used to execute commands and update + * the corresponding structures; can't use a spinlock + * because rc->cmd() can sleep. + * @ies: This is a dynamically allocated array cacheing the + * IEs (settable by the host) that the beacon of this + * radio controller is currently sending. + * + * In reality, we store here the full command we set to + * the radio controller (which is basically a command + * prefix followed by all the IEs the beacon currently + * contains). This way we don't have to realloc and + * memcpy when setting it. + * + * We set this up in uwb_rc_ie_setup(), where we alloc + * this struct, call get_ie() [so we know which IEs are + * currently being sent, if any]. + * + * @ies_capacity:Amount of space (in bytes) allocated in @ies. The + * amount used is given by sizeof(*ies) plus ies->wIELength + * (which is a little endian quantity all the time). + * @ies_mutex: protect the IE cache + * @dbg: information for the debug interface + */ +struct uwb_rc { + struct uwb_dev uwb_dev; + int index; + u16 version; + + struct module *owner; + void *priv; + int (*start)(struct uwb_rc *rc); + void (*stop)(struct uwb_rc *rc); + int (*cmd)(struct uwb_rc *, const struct uwb_rccb *, size_t); + int (*reset)(struct uwb_rc *rc); + int (*filter_cmd)(struct uwb_rc *, struct uwb_rccb **, size_t *); + int (*filter_event)(struct uwb_rc *, struct uwb_rceb **, const size_t, + size_t *, size_t *); + + spinlock_t neh_lock; /* protects neh_* and ctx_* */ + struct list_head neh_list; /* Open NE handles */ + unsigned long ctx_bm[UWB_RC_CTX_MAX / 8 / sizeof(unsigned long)]; + u8 ctx_roll; + + int beaconing; /* Beaconing state [channel number] */ + int scanning; + enum uwb_scan_type scan_type:3; + unsigned ready:1; + struct uwb_notifs_chain notifs_chain; + + struct uwb_drp_avail drp_avail; + struct list_head reservations; + struct mutex rsvs_mutex; + struct workqueue_struct *rsv_workq; + struct work_struct rsv_update_work; + + struct mutex ies_mutex; + struct uwb_rc_cmd_set_ie *ies; + size_t ies_capacity; + + spinlock_t pal_lock; + struct list_head pals; + + struct uwb_dbg *dbg; +}; + + +/** + * struct uwb_pal - a UWB PAL + * @new_rsv: called when a peer requests a reservation (may be NULL if + * the PAL cannot accept reservation requests). + * + * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB + * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP). + * + * The PALs using a radio controller must register themselves to + * permit the UWB stack to coordinate usage of the radio between the + * various PALs or to allow PALs to response to certain requests from + * peers. + * + * A struct uwb_pal should be embedded in a containing structure + * belonging to the PAL and initialized with uwb_pal_init()). Fields + * should be set appropriately by the PAL before registering the PAL + * with uwb_pal_register(). + */ +struct uwb_pal { + struct list_head node; + + void (*new_rsv)(struct uwb_rsv *rsv); +}; + +void uwb_pal_init(struct uwb_pal *pal); +int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal); +void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal); + +/* + * General public API + * + * This API can be used by UWB device drivers or by those implementing + * UWB Radio Controllers + */ +struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc, + const struct uwb_dev_addr *devaddr); +struct uwb_dev *uwb_dev_get_by_rc(struct uwb_dev *, struct uwb_rc *); +static inline void uwb_dev_get(struct uwb_dev *uwb_dev) +{ + get_device(&uwb_dev->dev); +} +static inline void uwb_dev_put(struct uwb_dev *uwb_dev) +{ + put_device(&uwb_dev->dev); +} +struct uwb_dev *uwb_dev_try_get(struct uwb_rc *rc, struct uwb_dev *uwb_dev); + +/** + * Callback function for 'uwb_{dev,rc}_foreach()'. + * + * @dev: Linux device instance + * 'uwb_dev = container_of(dev, struct uwb_dev, dev)' + * @priv: Data passed by the caller to 'uwb_{dev,rc}_foreach()'. + * + * @returns: 0 to continue the iterations, any other val to stop + * iterating and return the value to the caller of + * _foreach(). + */ +typedef int (*uwb_dev_for_each_f)(struct device *dev, void *priv); +int uwb_dev_for_each(struct uwb_rc *rc, uwb_dev_for_each_f func, void *priv); + +struct uwb_rc *uwb_rc_alloc(void); +struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *); +struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *); +void uwb_rc_put(struct uwb_rc *rc); + +typedef void (*uwb_rc_cmd_cb_f)(struct uwb_rc *rc, void *arg, + struct uwb_rceb *reply, ssize_t reply_size); + +int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + u8 expected_type, u16 expected_event, + uwb_rc_cmd_cb_f cb, void *arg); +ssize_t uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + struct uwb_rceb *reply, size_t reply_size); +ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name, + struct uwb_rccb *cmd, size_t cmd_size, + u8 expected_type, u16 expected_event, + struct uwb_rceb **preply); +ssize_t uwb_rc_get_ie(struct uwb_rc *, struct uwb_rc_evt_get_ie **); +int uwb_bg_joined(struct uwb_rc *rc); + +size_t __uwb_addr_print(char *, size_t, const unsigned char *, int); + +int uwb_rc_dev_addr_set(struct uwb_rc *, const struct uwb_dev_addr *); +int uwb_rc_dev_addr_get(struct uwb_rc *, struct uwb_dev_addr *); +int uwb_rc_mac_addr_set(struct uwb_rc *, const struct uwb_mac_addr *); +int uwb_rc_mac_addr_get(struct uwb_rc *, struct uwb_mac_addr *); +int __uwb_mac_addr_assigned_check(struct device *, void *); +int __uwb_dev_addr_assigned_check(struct device *, void *); + +/* Print in @buf a pretty repr of @addr */ +static inline size_t uwb_dev_addr_print(char *buf, size_t buf_size, + const struct uwb_dev_addr *addr) +{ + return __uwb_addr_print(buf, buf_size, addr->data, 0); +} + +/* Print in @buf a pretty repr of @addr */ +static inline size_t uwb_mac_addr_print(char *buf, size_t buf_size, + const struct uwb_mac_addr *addr) +{ + return __uwb_addr_print(buf, buf_size, addr->data, 1); +} + +/* @returns 0 if device addresses @addr2 and @addr1 are equal */ +static inline int uwb_dev_addr_cmp(const struct uwb_dev_addr *addr1, + const struct uwb_dev_addr *addr2) +{ + return memcmp(addr1, addr2, sizeof(*addr1)); +} + +/* @returns 0 if MAC addresses @addr2 and @addr1 are equal */ +static inline int uwb_mac_addr_cmp(const struct uwb_mac_addr *addr1, + const struct uwb_mac_addr *addr2) +{ + return memcmp(addr1, addr2, sizeof(*addr1)); +} + +/* @returns !0 if a MAC @addr is a broadcast address */ +static inline int uwb_mac_addr_bcast(const struct uwb_mac_addr *addr) +{ + struct uwb_mac_addr bcast = { + .data = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } + }; + return !uwb_mac_addr_cmp(addr, &bcast); +} + +/* @returns !0 if a MAC @addr is all zeroes*/ +static inline int uwb_mac_addr_unset(const struct uwb_mac_addr *addr) +{ + struct uwb_mac_addr unset = { + .data = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } + }; + return !uwb_mac_addr_cmp(addr, &unset); +} + +/* @returns !0 if the address is in use. */ +static inline unsigned __uwb_dev_addr_assigned(struct uwb_rc *rc, + struct uwb_dev_addr *addr) +{ + return uwb_dev_for_each(rc, __uwb_dev_addr_assigned_check, addr); +} + +/* + * UWB Radio Controller API + * + * This API is used (in addition to the general API) to implement UWB + * Radio Controllers. + */ +void uwb_rc_init(struct uwb_rc *); +int uwb_rc_add(struct uwb_rc *, struct device *dev, void *rc_priv); +void uwb_rc_rm(struct uwb_rc *); +void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t); +void uwb_rc_neh_error(struct uwb_rc *, int); +void uwb_rc_reset_all(struct uwb_rc *rc); + +/** + * uwb_rsv_is_owner - is the owner of this reservation the RC? + * @rsv: the reservation + */ +static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv) +{ + return rsv->owner == &rsv->rc->uwb_dev; +} + +/** + * Events generated by UWB that can be passed to any listeners + * + * Higher layers can register callback functions with the radio + * controller using uwb_notifs_register(). The radio controller + * maintains a list of all registered handlers and will notify all + * nodes when an event occurs. + */ +enum uwb_notifs { + UWB_NOTIF_BG_JOIN = 0, /* radio controller joined a beacon group */ + UWB_NOTIF_BG_LEAVE = 1, /* radio controller left a beacon group */ + UWB_NOTIF_ONAIR, + UWB_NOTIF_OFFAIR, +}; + +/* Callback function registered with UWB */ +struct uwb_notifs_handler { + struct list_head list_node; + void (*cb)(void *, struct uwb_dev *, enum uwb_notifs); + void *data; +}; + +int uwb_notifs_register(struct uwb_rc *, struct uwb_notifs_handler *); +int uwb_notifs_deregister(struct uwb_rc *, struct uwb_notifs_handler *); + + +/** + * UWB radio controller Event Size Entry (for creating entry tables) + * + * WUSB and WHCI define events and notifications, and they might have + * fixed or variable size. + * + * Each event/notification has a size which is not necessarily known + * in advance based on the event code. As well, vendor specific + * events/notifications will have a size impossible to determine + * unless we know about the device's specific details. + * + * It was way too smart of the spec writers not to think that it would + * be impossible for a generic driver to skip over vendor specific + * events/notifications if there are no LENGTH fields in the HEADER of + * each message...the transaction size cannot be counted on as the + * spec does not forbid to pack more than one event in a single + * transaction. + * + * Thus, we guess sizes with tables (or for events, when you know the + * size ahead of time you can use uwb_rc_neh_extra_size*()). We + * register tables with the known events and their sizes, and then we + * traverse those tables. For those with variable length, we provide a + * way to lookup the size inside the event/notification's + * payload. This allows device-specific event size tables to be + * registered. + * + * @size: Size of the payload + * + * @offset: if != 0, at offset @offset-1 starts a field with a length + * that has to be added to @size. The format of the field is + * given by @type. + * + * @type: Type and length of the offset field. Most common is LE 16 + * bits (that's why that is zero); others are there mostly to + * cover for bugs and weirdos. + */ +struct uwb_est_entry { + size_t size; + unsigned offset; + enum { UWB_EST_16 = 0, UWB_EST_8 = 1 } type; +}; + +int uwb_est_register(u8 type, u8 code_high, u16 vendor, u16 product, + const struct uwb_est_entry *, size_t entries); +int uwb_est_unregister(u8 type, u8 code_high, u16 vendor, u16 product, + const struct uwb_est_entry *, size_t entries); +ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb, + size_t len); + +/* -- Misc */ + +enum { + EDC_MAX_ERRORS = 10, + EDC_ERROR_TIMEFRAME = HZ, +}; + +/* error density counter */ +struct edc { + unsigned long timestart; + u16 errorcount; +}; + +static inline +void edc_init(struct edc *edc) +{ + edc->timestart = jiffies; +} + +/* Called when an error occured. + * This is way to determine if the number of acceptable errors per time + * period has been exceeded. It is not accurate as there are cases in which + * this scheme will not work, for example if there are periodic occurences + * of errors that straddle updates to the start time. This scheme is + * sufficient for our usage. + * + * @returns 1 if maximum acceptable errors per timeframe has been exceeded. + */ +static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe) +{ + unsigned long now; + + now = jiffies; + if (now - err_hist->timestart > timeframe) { + err_hist->errorcount = 1; + err_hist->timestart = now; + } else if (++err_hist->errorcount > max_err) { + err_hist->errorcount = 0; + err_hist->timestart = now; + return 1; + } + return 0; +} + + +/* Information Element handling */ + +/* For representing the state of writing to a buffer when iterating */ +struct uwb_buf_ctx { + char *buf; + size_t bytes, size; +}; + +typedef int (*uwb_ie_f)(struct uwb_dev *, const struct uwb_ie_hdr *, + size_t, void *); +struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); +ssize_t uwb_ie_for_each(struct uwb_dev *uwb_dev, uwb_ie_f fn, void *data, + const void *buf, size_t size); +int uwb_ie_dump_hex(struct uwb_dev *, const struct uwb_ie_hdr *, + size_t, void *); +int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *); +struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len); + + +/* + * Transmission statistics + * + * UWB uses LQI and RSSI (one byte values) for reporting radio signal + * strength and line quality indication. We do quick and dirty + * averages of those. They are signed values, btw. + * + * For 8 bit quantities, we keep the min, the max, an accumulator + * (@sigma) and a # of samples. When @samples gets to 255, we compute + * the average (@sigma / @samples), place it in @sigma and reset + * @samples to 1 (so we use it as the first sample). + * + * Now, statistically speaking, probably I am kicking the kidneys of + * some books I have in my shelves collecting dust, but I just want to + * get an approx, not the Nobel. + * + * LOCKING: there is no locking per se, but we try to keep a lockless + * schema. Only _add_samples() modifies the values--as long as you + * have other locking on top that makes sure that no two calls of + * _add_sample() happen at the same time, then we are fine. Now, for + * resetting the values we just set @samples to 0 and that makes the + * next _add_sample() to start with defaults. Reading the values in + * _show() currently can race, so you need to make sure the calls are + * under the same lock that protects calls to _add_sample(). FIXME: + * currently unlocked (It is not ultraprecise but does the trick. Bite + * me). + */ +struct stats { + s8 min, max; + s16 sigma; + atomic_t samples; +}; + +static inline +void stats_init(struct stats *stats) +{ + atomic_set(&stats->samples, 0); + wmb(); +} + +static inline +void stats_add_sample(struct stats *stats, s8 sample) +{ + s8 min, max; + s16 sigma; + unsigned samples = atomic_read(&stats->samples); + if (samples == 0) { /* it was zero before, so we initialize */ + min = 127; + max = -128; + sigma = 0; + } else { + min = stats->min; + max = stats->max; + sigma = stats->sigma; + } + + if (sample < min) /* compute new values */ + min = sample; + else if (sample > max) + max = sample; + sigma += sample; + + stats->min = min; /* commit */ + stats->max = max; + stats->sigma = sigma; + if (atomic_add_return(1, &stats->samples) > 255) { + /* wrapped around! reset */ + stats->sigma = sigma / 256; + atomic_set(&stats->samples, 1); + } +} + +static inline ssize_t stats_show(struct stats *stats, char *buf) +{ + int min, max, avg; + int samples = atomic_read(&stats->samples); + if (samples == 0) + min = max = avg = 0; + else { + min = stats->min; + max = stats->max; + avg = stats->sigma / samples; + } + return scnprintf(buf, PAGE_SIZE, "%d %d %d\n", min, max, avg); +} + +static inline ssize_t stats_store(struct stats *stats, const char *buf, + size_t size) +{ + stats_init(stats); + return size; +} + +#endif /* #ifndef __LINUX__UWB_H__ */ diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h new file mode 100644 index 000000000000..1141f41bab5c --- /dev/null +++ b/include/linux/uwb/debug-cmd.h @@ -0,0 +1,57 @@ +/* + * Ultra Wide Band + * Debug interface commands + * + * Copyright (C) 2008 Cambridge Silicon Radio Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __LINUX__UWB__DEBUG_CMD_H__ +#define __LINUX__UWB__DEBUG_CMD_H__ + +#include + +/* + * Debug interface commands + * + * UWB_DBG_CMD_RSV_ESTABLISH: Establish a new unicast reservation. + * + * UWB_DBG_CMD_RSV_TERMINATE: Terminate the Nth reservation. + */ + +enum uwb_dbg_cmd_type { + UWB_DBG_CMD_RSV_ESTABLISH = 1, + UWB_DBG_CMD_RSV_TERMINATE = 2, +}; + +struct uwb_dbg_cmd_rsv_establish { + __u8 target[6]; + __u8 type; + __u16 max_mas; + __u16 min_mas; + __u8 sparsity; +}; + +struct uwb_dbg_cmd_rsv_terminate { + int index; +}; + +struct uwb_dbg_cmd { + __u32 type; + union { + struct uwb_dbg_cmd_rsv_establish rsv_establish; + struct uwb_dbg_cmd_rsv_terminate rsv_terminate; + }; +}; + +#endif /* #ifndef __LINUX__UWB__DEBUG_CMD_H__ */ diff --git a/include/linux/uwb/debug.h b/include/linux/uwb/debug.h new file mode 100644 index 000000000000..a86a73fe303f --- /dev/null +++ b/include/linux/uwb/debug.h @@ -0,0 +1,82 @@ +/* + * Ultra Wide Band + * Debug Support + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: doc + * Invoke like: + * + * #define D_LOCAL 4 + * #include + * + * At the end of your include files. + */ +#include + +struct device; +extern void dump_bytes(struct device *dev, const void *_buf, size_t rsize); + +/* Master debug switch; !0 enables, 0 disables */ +#define D_MASTER (!0) + +/* Local (per-file) debug switch; #define before #including */ +#ifndef D_LOCAL +#define D_LOCAL 0 +#endif + +#undef __d_printf +#undef d_fnstart +#undef d_fnend +#undef d_printf +#undef d_dump + +#define __d_printf(l, _tag, _dev, f, a...) \ +do { \ + struct device *__dev = (_dev); \ + if (D_MASTER && D_LOCAL >= (l)) { \ + char __head[64] = ""; \ + if (_dev != NULL) { \ + if ((unsigned long)__dev < 4096) \ + printk(KERN_ERR "E: Corrupt dev %p\n", \ + __dev); \ + else \ + snprintf(__head, sizeof(__head), \ + "%s %s: ", \ + dev_driver_string(__dev), \ + __dev->bus_id); \ + } \ + printk(KERN_ERR "%s%s" _tag ": " f, __head, \ + __func__, ## a); \ + } \ +} while (0 && _dev) + +#define d_fnstart(l, _dev, f, a...) \ + __d_printf(l, " FNSTART", _dev, f, ## a) +#define d_fnend(l, _dev, f, a...) \ + __d_printf(l, " FNEND", _dev, f, ## a) +#define d_printf(l, _dev, f, a...) \ + __d_printf(l, "", _dev, f, ## a) +#define d_dump(l, _dev, ptr, size) \ +do { \ + struct device *__dev = _dev; \ + if (D_MASTER && D_LOCAL >= (l)) \ + dump_bytes(__dev, ptr, size); \ +} while (0 && _dev) +#define d_test(l) (D_MASTER && D_LOCAL >= (l)) diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h new file mode 100644 index 000000000000..198c15f8e251 --- /dev/null +++ b/include/linux/uwb/spec.h @@ -0,0 +1,727 @@ +/* + * Ultra Wide Band + * UWB Standard definitions + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * All these definitions are based on the ECMA-368 standard. + * + * Note all definitions are Little Endian in the wire, and we will + * convert them to host order before operating on the bitfields (that + * yes, we use extensively). + */ + +#ifndef __LINUX__UWB_SPEC_H__ +#define __LINUX__UWB_SPEC_H__ + +#include +#include + +#define i1480_FW 0x00000303 +/* #define i1480_FW 0x00000302 */ + +/** + * Number of Medium Access Slots in a superframe. + * + * UWB divides time in SuperFrames, each one divided in 256 pieces, or + * Medium Access Slots. See MBOA MAC[5.4.5] for details. The MAS is the + * basic bandwidth allocation unit in UWB. + */ +enum { UWB_NUM_MAS = 256 }; + +/** + * Number of Zones in superframe. + * + * UWB divides the superframe into zones with numbering starting from BPST. + * See MBOA MAC[16.8.6] + */ +enum { UWB_NUM_ZONES = 16 }; + +/* + * Number of MAS in a zone. + */ +#define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES) + +/* + * Number of streams per DRP reservation between a pair of devices. + * + * [ECMA-368] section 16.8.6. + */ +enum { UWB_NUM_STREAMS = 8 }; + +/* + * mMasLength + * + * The length of a MAS in microseconds. + * + * [ECMA-368] section 17.16. + */ +enum { UWB_MAS_LENGTH_US = 256 }; + +/* + * mBeaconSlotLength + * + * The length of the beacon slot in microseconds. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_BEACON_SLOT_LENGTH_US = 85 }; + +/* + * mMaxLostBeacons + * + * The number beacons missing in consecutive superframes before a + * device can be considered as unreachable. + * + * [ECMA-368] section 17.16 + */ +enum { UWB_MAX_LOST_BEACONS = 3 }; + +/* + * Length of a superframe in microseconds. + */ +#define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS) + +/** + * UWB MAC address + * + * It is *imperative* that this struct is exactly 6 packed bytes (as + * it is also used to define headers sent down and up the wire/radio). + */ +struct uwb_mac_addr { + u8 data[6]; +} __attribute__((packed)); + + +/** + * UWB device address + * + * It is *imperative* that this struct is exactly 6 packed bytes (as + * it is also used to define headers sent down and up the wire/radio). + */ +struct uwb_dev_addr { + u8 data[2]; +} __attribute__((packed)); + + +/** + * Types of UWB addresses + * + * Order matters (by size). + */ +enum uwb_addr_type { + UWB_ADDR_DEV = 0, + UWB_ADDR_MAC = 1, +}; + + +/** Size of a char buffer for printing a MAC/device address */ +enum { UWB_ADDR_STRSIZE = 32 }; + + +/** UWB WiMedia protocol IDs. */ +enum uwb_prid { + UWB_PRID_WLP_RESERVED = 0x0000, + UWB_PRID_WLP = 0x0001, + UWB_PRID_WUSB_BOT = 0x0010, + UWB_PRID_WUSB = 0x0010, + UWB_PRID_WUSB_TOP = 0x001F, +}; + + +/** PHY Rate (MBOA MAC[7.8.12, Table 61]) */ +enum uwb_phy_rate { + UWB_PHY_RATE_53 = 0, + UWB_PHY_RATE_80, + UWB_PHY_RATE_106, + UWB_PHY_RATE_160, + UWB_PHY_RATE_200, + UWB_PHY_RATE_320, + UWB_PHY_RATE_400, + UWB_PHY_RATE_480, + UWB_PHY_RATE_INVALID +}; + + +/** + * Different ways to scan (MBOA MAC[6.2.2, Table 8], WUSB[Table 8-78]) + */ +enum uwb_scan_type { + UWB_SCAN_ONLY = 0, + UWB_SCAN_OUTSIDE_BP, + UWB_SCAN_WHILE_INACTIVE, + UWB_SCAN_DISABLED, + UWB_SCAN_ONLY_STARTTIME, + UWB_SCAN_TOP +}; + + +/** ACK Policy types (MBOA MAC[7.2.1.3]) */ +enum uwb_ack_pol { + UWB_ACK_NO = 0, + UWB_ACK_INM = 1, + UWB_ACK_B = 2, + UWB_ACK_B_REQ = 3, +}; + + +/** DRP reservation types ([ECMA-368 table 106) */ +enum uwb_drp_type { + UWB_DRP_TYPE_ALIEN_BP = 0, + UWB_DRP_TYPE_HARD, + UWB_DRP_TYPE_SOFT, + UWB_DRP_TYPE_PRIVATE, + UWB_DRP_TYPE_PCA, +}; + + +/** DRP Reason Codes ([ECMA-368] table 107) */ +enum uwb_drp_reason { + UWB_DRP_REASON_ACCEPTED = 0, + UWB_DRP_REASON_CONFLICT, + UWB_DRP_REASON_PENDING, + UWB_DRP_REASON_DENIED, + UWB_DRP_REASON_MODIFIED, +}; + +/** + * DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9]) + */ +enum uwb_drp_notif_reason { + UWB_DRP_NOTIF_DRP_IE_RCVD = 0, + UWB_DRP_NOTIF_CONFLICT, + UWB_DRP_NOTIF_TERMINATE, +}; + + +/** Allocation of MAS slots in a DRP request MBOA MAC[7.8.7] */ +struct uwb_drp_alloc { + __le16 zone_bm; + __le16 mas_bm; +} __attribute__((packed)); + + +/** General MAC Header format (ECMA-368[16.2]) */ +struct uwb_mac_frame_hdr { + __le16 Frame_Control; + struct uwb_dev_addr DestAddr; + struct uwb_dev_addr SrcAddr; + __le16 Sequence_Control; + __le16 Access_Information; +} __attribute__((packed)); + + +/** + * uwb_beacon_frame - a beacon frame including MAC headers + * + * [ECMA] section 16.3. + */ +struct uwb_beacon_frame { + struct uwb_mac_frame_hdr hdr; + struct uwb_mac_addr Device_Identifier; /* may be a NULL EUI-48 */ + u8 Beacon_Slot_Number; + u8 Device_Control; + u8 IEData[]; +} __attribute__((packed)); + + +/** Information Element codes (MBOA MAC[T54]) */ +enum uwb_ie { + UWB_PCA_AVAILABILITY = 2, + UWB_IE_DRP_AVAILABILITY = 8, + UWB_IE_DRP = 9, + UWB_BP_SWITCH_IE = 11, + UWB_MAC_CAPABILITIES_IE = 12, + UWB_PHY_CAPABILITIES_IE = 13, + UWB_APP_SPEC_PROBE_IE = 15, + UWB_IDENTIFICATION_IE = 19, + UWB_MASTER_KEY_ID_IE = 20, + UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */ + UWB_APP_SPEC_IE = 255, +}; + + +/** + * Header common to all Information Elements (IEs) + */ +struct uwb_ie_hdr { + u8 element_id; /* enum uwb_ie */ + u8 length; +} __attribute__((packed)); + + +/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.6]) */ +struct uwb_ie_drp { + struct uwb_ie_hdr hdr; + __le16 drp_control; + struct uwb_dev_addr dev_addr; + struct uwb_drp_alloc allocs[]; +} __attribute__((packed)); + +static inline int uwb_ie_drp_type(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 0) & 0x7; +} + +static inline int uwb_ie_drp_stream_index(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 3) & 0x7; +} + +static inline int uwb_ie_drp_reason_code(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 6) & 0x7; +} + +static inline int uwb_ie_drp_status(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 9) & 0x1; +} + +static inline int uwb_ie_drp_owner(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 10) & 0x1; +} + +static inline int uwb_ie_drp_tiebreaker(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 11) & 0x1; +} + +static inline int uwb_ie_drp_unsafe(struct uwb_ie_drp *ie) +{ + return (le16_to_cpu(ie->drp_control) >> 12) & 0x1; +} + +static inline void uwb_ie_drp_set_type(struct uwb_ie_drp *ie, enum uwb_drp_type type) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x7 << 0)) | (type << 0); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_stream_index(struct uwb_ie_drp *ie, int stream_index) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x7 << 3)) | (stream_index << 3); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_reason_code(struct uwb_ie_drp *ie, + enum uwb_drp_reason reason_code) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (ie->drp_control & ~(0x7 << 6)) | (reason_code << 6); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_status(struct uwb_ie_drp *ie, int status) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 9)) | (status << 9); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_owner(struct uwb_ie_drp *ie, int owner) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 10)) | (owner << 10); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_tiebreaker(struct uwb_ie_drp *ie, int tiebreaker) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 11)) | (tiebreaker << 11); + ie->drp_control = cpu_to_le16(drp_control); +} + +static inline void uwb_ie_drp_set_unsafe(struct uwb_ie_drp *ie, int unsafe) +{ + u16 drp_control = le16_to_cpu(ie->drp_control); + drp_control = (drp_control & ~(0x1 << 12)) | (unsafe << 12); + ie->drp_control = cpu_to_le16(drp_control); +} + +/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.7]) */ +struct uwb_ie_drp_avail { + struct uwb_ie_hdr hdr; + DECLARE_BITMAP(bmp, UWB_NUM_MAS); +} __attribute__((packed)); + +/** + * The Vendor ID is set to an OUI that indicates the vendor of the device. + * ECMA-368 [16.8.10] + */ +struct uwb_vendor_id { + u8 data[3]; +} __attribute__((packed)); + +/** + * The device type ID + * FIXME: clarify what this means + * ECMA-368 [16.8.10] + */ +struct uwb_device_type_id { + u8 data[3]; +} __attribute__((packed)); + + +/** + * UWB device information types + * ECMA-368 [16.8.10] + */ +enum uwb_dev_info_type { + UWB_DEV_INFO_VENDOR_ID = 0, + UWB_DEV_INFO_VENDOR_TYPE, + UWB_DEV_INFO_NAME, +}; + +/** + * UWB device information found in Identification IE + * ECMA-368 [16.8.10] + */ +struct uwb_dev_info { + u8 type; /* enum uwb_dev_info_type */ + u8 length; + u8 data[]; +} __attribute__((packed)); + +/** + * UWB Identification IE + * ECMA-368 [16.8.10] + */ +struct uwb_identification_ie { + struct uwb_ie_hdr hdr; + struct uwb_dev_info info[]; +} __attribute__((packed)); + +/* + * UWB Radio Controller + * + * These definitions are common to the Radio Control layers as + * exported by the WUSB1.0 HWA and WHCI interfaces. + */ + +/** Radio Control Command Block (WUSB1.0[Table 8-65] and WHCI 0.95) */ +struct uwb_rccb { + u8 bCommandType; /* enum hwa_cet */ + __le16 wCommand; /* Command code */ + u8 bCommandContext; /* Context ID */ +} __attribute__((packed)); + + +/** Radio Control Event Block (WUSB[table 8-66], WHCI 0.95) */ +struct uwb_rceb { + u8 bEventType; /* enum hwa_cet */ + __le16 wEvent; /* Event code */ + u8 bEventContext; /* Context ID */ +} __attribute__((packed)); + + +enum { + UWB_RC_CET_GENERAL = 0, /* General Command/Event type */ + UWB_RC_CET_EX_TYPE_1 = 1, /* Extended Type 1 Command/Event type */ +}; + +/* Commands to the radio controller */ +enum uwb_rc_cmd { + UWB_RC_CMD_CHANNEL_CHANGE = 16, + UWB_RC_CMD_DEV_ADDR_MGMT = 17, /* Device Address Management */ + UWB_RC_CMD_GET_IE = 18, /* GET Information Elements */ + UWB_RC_CMD_RESET = 19, + UWB_RC_CMD_SCAN = 20, /* Scan management */ + UWB_RC_CMD_SET_BEACON_FILTER = 21, + UWB_RC_CMD_SET_DRP_IE = 22, /* Dynamic Reservation Protocol IEs */ + UWB_RC_CMD_SET_IE = 23, /* Information Element management */ + UWB_RC_CMD_SET_NOTIFICATION_FILTER = 24, + UWB_RC_CMD_SET_TX_POWER = 25, + UWB_RC_CMD_SLEEP = 26, + UWB_RC_CMD_START_BEACON = 27, + UWB_RC_CMD_STOP_BEACON = 28, + UWB_RC_CMD_BP_MERGE = 29, + UWB_RC_CMD_SEND_COMMAND_FRAME = 30, + UWB_RC_CMD_SET_ASIE_NOTIF = 31, +}; + +/* Notifications from the radio controller */ +enum uwb_rc_evt { + UWB_RC_EVT_IE_RCV = 0, + UWB_RC_EVT_BEACON = 1, + UWB_RC_EVT_BEACON_SIZE = 2, + UWB_RC_EVT_BPOIE_CHANGE = 3, + UWB_RC_EVT_BP_SLOT_CHANGE = 4, + UWB_RC_EVT_BP_SWITCH_IE_RCV = 5, + UWB_RC_EVT_DEV_ADDR_CONFLICT = 6, + UWB_RC_EVT_DRP_AVAIL = 7, + UWB_RC_EVT_DRP = 8, + UWB_RC_EVT_BP_SWITCH_STATUS = 9, + UWB_RC_EVT_CMD_FRAME_RCV = 10, + UWB_RC_EVT_CHANNEL_CHANGE_IE_RCV = 11, + /* Events (command responses) use the same code as the command */ + UWB_RC_EVT_UNKNOWN_CMD_RCV = 65535, +}; + +enum uwb_rc_extended_type_1_cmd { + UWB_RC_SET_DAA_ENERGY_MASK = 32, + UWB_RC_SET_NOTIFICATION_FILTER_EX = 33, +}; + +enum uwb_rc_extended_type_1_evt { + UWB_RC_DAA_ENERGY_DETECTED = 0, +}; + +/* Radio Control Result Code. [WHCI] table 3-3. */ +enum { + UWB_RC_RES_SUCCESS = 0, + UWB_RC_RES_FAIL, + UWB_RC_RES_FAIL_HARDWARE, + UWB_RC_RES_FAIL_NO_SLOTS, + UWB_RC_RES_FAIL_BEACON_TOO_LARGE, + UWB_RC_RES_FAIL_INVALID_PARAMETER, + UWB_RC_RES_FAIL_UNSUPPORTED_PWR_LEVEL, + UWB_RC_RES_FAIL_INVALID_IE_DATA, + UWB_RC_RES_FAIL_BEACON_SIZE_EXCEEDED, + UWB_RC_RES_FAIL_CANCELLED, + UWB_RC_RES_FAIL_INVALID_STATE, + UWB_RC_RES_FAIL_INVALID_SIZE, + UWB_RC_RES_FAIL_ACK_NOT_RECEIVED, + UWB_RC_RES_FAIL_NO_MORE_ASIE_NOTIF, + UWB_RC_RES_FAIL_TIME_OUT = 255, +}; + +/* Confirm event. [WHCI] section 3.1.3.1 etc. */ +struct uwb_rc_evt_confirm { + struct uwb_rceb rceb; + u8 bResultCode; +} __attribute__((packed)); + +/* Device Address Management event. [WHCI] section 3.1.3.2. */ +struct uwb_rc_evt_dev_addr_mgmt { + struct uwb_rceb rceb; + u8 baAddr[6]; + u8 bResultCode; +} __attribute__((packed)); + + +/* Get IE Event. [WHCI] section 3.1.3.3. */ +struct uwb_rc_evt_get_ie { + struct uwb_rceb rceb; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* Set DRP IE Event. [WHCI] section 3.1.3.7. */ +struct uwb_rc_evt_set_drp_ie { + struct uwb_rceb rceb; + __le16 wRemainingSpace; + u8 bResultCode; +} __attribute__((packed)); + +/* Set IE Event. [WHCI] section 3.1.3.8. */ +struct uwb_rc_evt_set_ie { + struct uwb_rceb rceb; + __le16 RemainingSpace; + u8 bResultCode; +} __attribute__((packed)); + +/* Scan command. [WHCI] 3.1.3.5. */ +struct uwb_rc_cmd_scan { + struct uwb_rccb rccb; + u8 bChannelNumber; + u8 bScanState; + __le16 wStartTime; +} __attribute__((packed)); + +/* Set DRP IE command. [WHCI] section 3.1.3.7. */ +struct uwb_rc_cmd_set_drp_ie { + struct uwb_rccb rccb; + __le16 wIELength; + struct uwb_ie_drp IEData[]; +} __attribute__((packed)); + +/* Set IE command. [WHCI] section 3.1.3.8. */ +struct uwb_rc_cmd_set_ie { + struct uwb_rccb rccb; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* Set DAA Energy Mask event. [WHCI 0.96] section 3.1.3.17. */ +struct uwb_rc_evt_set_daa_energy_mask { + struct uwb_rceb rceb; + __le16 wLength; + u8 result; +} __attribute__((packed)); + +/* Set Notification Filter Extended event. [WHCI 0.96] section 3.1.3.18. */ +struct uwb_rc_evt_set_notification_filter_ex { + struct uwb_rceb rceb; + __le16 wLength; + u8 result; +} __attribute__((packed)); + +/* IE Received notification. [WHCI] section 3.1.4.1. */ +struct uwb_rc_evt_ie_rcv { + struct uwb_rceb rceb; + struct uwb_dev_addr SrcAddr; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* Type of the received beacon. [WHCI] section 3.1.4.2. */ +enum uwb_rc_beacon_type { + UWB_RC_BEACON_TYPE_SCAN = 0, + UWB_RC_BEACON_TYPE_NEIGHBOR, + UWB_RC_BEACON_TYPE_OL_ALIEN, + UWB_RC_BEACON_TYPE_NOL_ALIEN, +}; + +/* Beacon received notification. [WHCI] 3.1.4.2. */ +struct uwb_rc_evt_beacon { + struct uwb_rceb rceb; + u8 bChannelNumber; + u8 bBeaconType; + __le16 wBPSTOffset; + u8 bLQI; + u8 bRSSI; + __le16 wBeaconInfoLength; + u8 BeaconInfo[]; +} __attribute__((packed)); + + +/* Beacon Size Change notification. [WHCI] section 3.1.4.3 */ +struct uwb_rc_evt_beacon_size { + struct uwb_rceb rceb; + __le16 wNewBeaconSize; +} __attribute__((packed)); + + +/* BPOIE Change notification. [WHCI] section 3.1.4.4. */ +struct uwb_rc_evt_bpoie_change { + struct uwb_rceb rceb; + __le16 wBPOIELength; + u8 BPOIE[]; +} __attribute__((packed)); + + +/* Beacon Slot Change notification. [WHCI] section 3.1.4.5. */ +struct uwb_rc_evt_bp_slot_change { + struct uwb_rceb rceb; + u8 slot_info; +} __attribute__((packed)); + +static inline int uwb_rc_evt_bp_slot_change_slot_num( + const struct uwb_rc_evt_bp_slot_change *evt) +{ + return evt->slot_info & 0x7f; +} + +static inline int uwb_rc_evt_bp_slot_change_no_slot( + const struct uwb_rc_evt_bp_slot_change *evt) +{ + return (evt->slot_info & 0x80) >> 7; +} + +/* BP Switch IE Received notification. [WHCI] section 3.1.4.6. */ +struct uwb_rc_evt_bp_switch_ie_rcv { + struct uwb_rceb rceb; + struct uwb_dev_addr wSrcAddr; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* DevAddr Conflict notification. [WHCI] section 3.1.4.7. */ +struct uwb_rc_evt_dev_addr_conflict { + struct uwb_rceb rceb; +} __attribute__((packed)); + +/* DRP notification. [WHCI] section 3.1.4.9. */ +struct uwb_rc_evt_drp { + struct uwb_rceb rceb; + struct uwb_dev_addr src_addr; + u8 reason; + u8 beacon_slot_number; + __le16 ie_length; + u8 ie_data[]; +} __attribute__((packed)); + +static inline enum uwb_drp_notif_reason uwb_rc_evt_drp_reason(struct uwb_rc_evt_drp *evt) +{ + return evt->reason & 0x0f; +} + + +/* DRP Availability Change notification. [WHCI] section 3.1.4.8. */ +struct uwb_rc_evt_drp_avail { + struct uwb_rceb rceb; + DECLARE_BITMAP(bmp, UWB_NUM_MAS); +} __attribute__((packed)); + +/* BP switch status notification. [WHCI] section 3.1.4.10. */ +struct uwb_rc_evt_bp_switch_status { + struct uwb_rceb rceb; + u8 status; + u8 slot_offset; + __le16 bpst_offset; + u8 move_countdown; +} __attribute__((packed)); + +/* Command Frame Received notification. [WHCI] section 3.1.4.11. */ +struct uwb_rc_evt_cmd_frame_rcv { + struct uwb_rceb rceb; + __le16 receive_time; + struct uwb_dev_addr wSrcAddr; + struct uwb_dev_addr wDstAddr; + __le16 control; + __le16 reserved; + __le16 dataLength; + u8 data[]; +} __attribute__((packed)); + +/* Channel Change IE Received notification. [WHCI] section 3.1.4.12. */ +struct uwb_rc_evt_channel_change_ie_rcv { + struct uwb_rceb rceb; + struct uwb_dev_addr wSrcAddr; + __le16 wIELength; + u8 IEData[]; +} __attribute__((packed)); + +/* DAA Energy Detected notification. [WHCI 0.96] section 3.1.4.14. */ +struct uwb_rc_evt_daa_energy_detected { + struct uwb_rceb rceb; + __le16 wLength; + u8 bandID; + u8 reserved; + u8 toneBmp[16]; +} __attribute__((packed)); + + +/** + * Radio Control Interface Class Descriptor + * + * WUSB 1.0 [8.6.1.2] + */ +struct uwb_rc_control_intf_class_desc { + u8 bLength; + u8 bDescriptorType; + __le16 bcdRCIVersion; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX__UWB_SPEC_H__ */ diff --git a/include/linux/wlp.h b/include/linux/wlp.h new file mode 100644 index 000000000000..033545e145c7 --- /dev/null +++ b/include/linux/wlp.h @@ -0,0 +1,735 @@ +/* + * WiMedia Logical Link Control Protocol (WLP) + * + * Copyright (C) 2005-2006 Intel Corporation + * Reinette Chatre + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: docs + * + * - Does not (yet) include support for WLP control frames + * WLP Draft 0.99 [6.5]. + * + * A visual representation of the data structures. + * + * wssidB wssidB + * ^ ^ + * | | + * wssidA wssidA + * wlp interface { ^ ^ + * ... | | + * ... ... wssid wssid ... + * wlp --- ... | | + * }; neighbors --> neighbA --> neighbB + * ... + * wss + * ... + * eda cache --> neighborA --> neighborB --> neighborC ... + */ + +#ifndef __LINUX__WLP_H_ +#define __LINUX__WLP_H_ + +#include +#include +#include +#include + +/** + * WLP Protocol ID + * WLP Draft 0.99 [6.2] + * + * The MUX header for all WLP frames + */ +#define WLP_PROTOCOL_ID 0x0100 + +/** + * WLP Version + * WLP version placed in the association frames (WLP 0.99 [6.6]) + */ +#define WLP_VERSION 0x10 + +/** + * Bytes needed to print UUID as string + */ +#define WLP_WSS_UUID_STRSIZE 48 + +/** + * Bytes needed to print nonce as string + */ +#define WLP_WSS_NONCE_STRSIZE 48 + + +/** + * Size used for WLP name size + * + * The WSS name is set to 65 bytes, 1 byte larger than the maximum + * allowed by the WLP spec. This is to have a null terminated string + * for display to the user. A maximum of 64 bytes will still be used + * when placing the WSS name field in association frames. + */ +#define WLP_WSS_NAME_SIZE 65 + +/** + * Number of bytes added by WLP to data frame + * + * A data frame transmitted from a host will be placed in a Standard or + * Abbreviated WLP frame. These have an extra 4 bytes of header (struct + * wlp_frame_std_abbrv_hdr). + * When the stack sends this data frame for transmission it needs to ensure + * there is enough headroom for this header. + */ +#define WLP_DATA_HLEN 4 + +/** + * State of device regarding WLP Service Set + * + * WLP_WSS_STATE_NONE: the host does not participate in any WSS + * WLP_WSS_STATE_PART_ENROLLED: used as part of the enrollment sequence + * ("Partial Enroll"). This state is used to + * indicate the first part of enrollment that is + * unsecure. If the WSS is unsecure then the + * state will promptly go to WLP_WSS_STATE_ENROLLED, + * if the WSS is not secure then the enrollment + * procedure is a few more steps before we are + * enrolled. + * WLP_WSS_STATE_ENROLLED: the host is enrolled in a WSS + * WLP_WSS_STATE_ACTIVE: WSS is activated + * WLP_WSS_STATE_CONNECTED: host is connected to neighbor in WSS + * + */ +enum wlp_wss_state { + WLP_WSS_STATE_NONE = 0, + WLP_WSS_STATE_PART_ENROLLED, + WLP_WSS_STATE_ENROLLED, + WLP_WSS_STATE_ACTIVE, + WLP_WSS_STATE_CONNECTED, +}; + +/** + * WSS Secure status + * WLP 0.99 Table 6 + * + * Set to one if the WSS is secure, zero if it is not secure + */ +enum wlp_wss_sec_status { + WLP_WSS_UNSECURE = 0, + WLP_WSS_SECURE, +}; + +/** + * WLP frame type + * WLP Draft 0.99 [6.2 Table 1] + */ +enum wlp_frame_type { + WLP_FRAME_STANDARD = 0, + WLP_FRAME_ABBREVIATED, + WLP_FRAME_CONTROL, + WLP_FRAME_ASSOCIATION, +}; + +/** + * WLP Association Message Type + * WLP Draft 0.99 [6.6.1.2 Table 8] + */ +enum wlp_assoc_type { + WLP_ASSOC_D1 = 2, + WLP_ASSOC_D2 = 3, + WLP_ASSOC_M1 = 4, + WLP_ASSOC_M2 = 5, + WLP_ASSOC_M3 = 7, + WLP_ASSOC_M4 = 8, + WLP_ASSOC_M5 = 9, + WLP_ASSOC_M6 = 10, + WLP_ASSOC_M7 = 11, + WLP_ASSOC_M8 = 12, + WLP_ASSOC_F0 = 14, + WLP_ASSOC_E1 = 32, + WLP_ASSOC_E2 = 33, + WLP_ASSOC_C1 = 34, + WLP_ASSOC_C2 = 35, + WLP_ASSOC_C3 = 36, + WLP_ASSOC_C4 = 37, +}; + +/** + * WLP Attribute Type + * WLP Draft 0.99 [6.6.1 Table 6] + */ +enum wlp_attr_type { + WLP_ATTR_AUTH = 0x1005, /* Authenticator */ + WLP_ATTR_DEV_NAME = 0x1011, /* Device Name */ + WLP_ATTR_DEV_PWD_ID = 0x1012, /* Device Password ID */ + WLP_ATTR_E_HASH1 = 0x1014, /* E-Hash1 */ + WLP_ATTR_E_HASH2 = 0x1015, /* E-Hash2 */ + WLP_ATTR_E_SNONCE1 = 0x1016, /* E-SNonce1 */ + WLP_ATTR_E_SNONCE2 = 0x1017, /* E-SNonce2 */ + WLP_ATTR_ENCR_SET = 0x1018, /* Encrypted Settings */ + WLP_ATTR_ENRL_NONCE = 0x101A, /* Enrollee Nonce */ + WLP_ATTR_KEYWRAP_AUTH = 0x101E, /* Key Wrap Authenticator */ + WLP_ATTR_MANUF = 0x1021, /* Manufacturer */ + WLP_ATTR_MSG_TYPE = 0x1022, /* Message Type */ + WLP_ATTR_MODEL_NAME = 0x1023, /* Model Name */ + WLP_ATTR_MODEL_NR = 0x1024, /* Model Number */ + WLP_ATTR_PUB_KEY = 0x1032, /* Public Key */ + WLP_ATTR_REG_NONCE = 0x1039, /* Registrar Nonce */ + WLP_ATTR_R_HASH1 = 0x103D, /* R-Hash1 */ + WLP_ATTR_R_HASH2 = 0x103E, /* R-Hash2 */ + WLP_ATTR_R_SNONCE1 = 0x103F, /* R-SNonce1 */ + WLP_ATTR_R_SNONCE2 = 0x1040, /* R-SNonce2 */ + WLP_ATTR_SERIAL = 0x1042, /* Serial number */ + WLP_ATTR_UUID_E = 0x1047, /* UUID-E */ + WLP_ATTR_UUID_R = 0x1048, /* UUID-R */ + WLP_ATTR_PRI_DEV_TYPE = 0x1054, /* Primary Device Type */ + WLP_ATTR_SEC_DEV_TYPE = 0x1055, /* Secondary Device Type */ + WLP_ATTR_PORT_DEV = 0x1056, /* Portable Device */ + WLP_ATTR_APP_EXT = 0x1058, /* Application Extension */ + WLP_ATTR_WLP_VER = 0x2000, /* WLP Version */ + WLP_ATTR_WSSID = 0x2001, /* WSSID */ + WLP_ATTR_WSS_NAME = 0x2002, /* WSS Name */ + WLP_ATTR_WSS_SEC_STAT = 0x2003, /* WSS Secure Status */ + WLP_ATTR_WSS_BCAST = 0x2004, /* WSS Broadcast Address */ + WLP_ATTR_WSS_M_KEY = 0x2005, /* WSS Master Key */ + WLP_ATTR_ACC_ENRL = 0x2006, /* Accepting Enrollment */ + WLP_ATTR_WSS_INFO = 0x2007, /* WSS Information */ + WLP_ATTR_WSS_SEL_MTHD = 0x2008, /* WSS Selection Method */ + WLP_ATTR_ASSC_MTHD_LIST = 0x2009, /* Association Methods List */ + WLP_ATTR_SEL_ASSC_MTHD = 0x200A, /* Selected Association Method */ + WLP_ATTR_ENRL_HASH_COMM = 0x200B, /* Enrollee Hash Commitment */ + WLP_ATTR_WSS_TAG = 0x200C, /* WSS Tag */ + WLP_ATTR_WSS_VIRT = 0x200D, /* WSS Virtual EUI-48 */ + WLP_ATTR_WLP_ASSC_ERR = 0x200E, /* WLP Association Error */ + WLP_ATTR_VNDR_EXT = 0x200F, /* Vendor Extension */ +}; + +/** + * WLP Category ID of primary/secondary device + * WLP Draft 0.99 [6.6.1.8 Table 12] + */ +enum wlp_dev_category_id { + WLP_DEV_CAT_COMPUTER = 1, + WLP_DEV_CAT_INPUT, + WLP_DEV_CAT_PRINT_SCAN_FAX_COPIER, + WLP_DEV_CAT_CAMERA, + WLP_DEV_CAT_STORAGE, + WLP_DEV_CAT_INFRASTRUCTURE, + WLP_DEV_CAT_DISPLAY, + WLP_DEV_CAT_MULTIM, + WLP_DEV_CAT_GAMING, + WLP_DEV_CAT_TELEPHONE, + WLP_DEV_CAT_OTHER = 65535, +}; + +/** + * WLP WSS selection method + * WLP Draft 0.99 [6.6.1.6 Table 10] + */ +enum wlp_wss_sel_mthd { + WLP_WSS_ENRL_SELECT = 1, /* Enrollee selects */ + WLP_WSS_REG_SELECT, /* Registrar selects */ +}; + +/** + * WLP association error values + * WLP Draft 0.99 [6.6.1.5 Table 9] + */ +enum wlp_assc_error { + WLP_ASSOC_ERROR_NONE, + WLP_ASSOC_ERROR_AUTH, /* Authenticator Failure */ + WLP_ASSOC_ERROR_ROGUE, /* Rogue activity suspected */ + WLP_ASSOC_ERROR_BUSY, /* Device busy */ + WLP_ASSOC_ERROR_LOCK, /* Setup Locked */ + WLP_ASSOC_ERROR_NOT_READY, /* Registrar not ready */ + WLP_ASSOC_ERROR_INV, /* Invalid WSS selection */ + WLP_ASSOC_ERROR_MSG_TIME, /* Message timeout */ + WLP_ASSOC_ERROR_ENR_TIME, /* Enrollment session timeout */ + WLP_ASSOC_ERROR_PW, /* Device password invalid */ + WLP_ASSOC_ERROR_VER, /* Unsupported version */ + WLP_ASSOC_ERROR_INT, /* Internal error */ + WLP_ASSOC_ERROR_UNDEF, /* Undefined error */ + WLP_ASSOC_ERROR_NUM, /* Numeric comparison failure */ + WLP_ASSOC_ERROR_WAIT, /* Waiting for user input */ +}; + +/** + * WLP Parameters + * WLP 0.99 [7.7] + */ +enum wlp_parameters { + WLP_PER_MSG_TIMEOUT = 15, /* Seconds to wait for response to + association message. */ +}; + +/** + * WLP IE + * + * The WLP IE should be included in beacons by all devices. + * + * The driver can set only a few of the fields in this information element, + * most fields are managed by the device self. When the driver needs to set + * a field it will only provide values for the fields of interest, the rest + * will be filled with zeroes. The fields of interest are: + * + * Element ID + * Length + * Capabilities (only to include WSSID Hash list length) + * WSSID Hash List fields + * + * WLP 0.99 [6.7] + * + * Only the fields that will be used are detailed in this structure, rest + * are not detailed or marked as "notused". + */ +struct wlp_ie { + struct uwb_ie_hdr hdr; + __le16 capabilities; + __le16 cycle_param; + __le16 acw_anchor_addr; + u8 wssid_hash_list[]; +} __attribute__((packed)); + +static inline int wlp_ie_hash_length(struct wlp_ie *ie) +{ + return (le16_to_cpu(ie->capabilities) >> 12) & 0xf; +} + +static inline void wlp_ie_set_hash_length(struct wlp_ie *ie, int hash_length) +{ + u16 caps = le16_to_cpu(ie->capabilities); + caps = (caps & ~(0xf << 12)) | (hash_length << 12); + ie->capabilities = cpu_to_le16(caps); +} + +/** + * WLP nonce + * WLP Draft 0.99 [6.6.1 Table 6] + * + * A 128-bit random number often used (E-SNonce1, E-SNonce2, Enrollee + * Nonce, Registrar Nonce, R-SNonce1, R-SNonce2). It is passed to HW so + * it is packed. + */ +struct wlp_nonce { + u8 data[16]; +} __attribute__((packed)); + +/** + * WLP UUID + * WLP Draft 0.99 [6.6.1 Table 6] + * + * Universally Unique Identifier (UUID) encoded as an octet string in the + * order the octets are shown in string representation in RFC4122. A UUID + * is often used (UUID-E, UUID-R, WSSID). It is passed to HW so it is packed. + */ +struct wlp_uuid { + u8 data[16]; +} __attribute__((packed)); + + +/** + * Primary and secondary device type attributes + * WLP Draft 0.99 [6.6.1.8] + */ +struct wlp_dev_type { + enum wlp_dev_category_id category:16; + u8 OUI[3]; + u8 OUIsubdiv; + __le16 subID; +} __attribute__((packed)); + +/** + * WLP frame header + * WLP Draft 0.99 [6.2] + */ +struct wlp_frame_hdr { + __le16 mux_hdr; /* WLP_PROTOCOL_ID */ + enum wlp_frame_type type:8; +} __attribute__((packed)); + +/** + * WLP attribute field header + * WLP Draft 0.99 [6.6.1] + * + * Header of each attribute found in an association frame + */ +struct wlp_attr_hdr { + __le16 type; + __le16 length; +} __attribute__((packed)); + +/** + * Device information commonly used together + * + * Each of these device information elements has a specified range in which it + * should fit (WLP 0.99 [Table 6]). This range provided in the spec does not + * include the termination null '\0' character (when used in the + * association protocol the attribute fields are accompanied + * with a "length" field so the full range from the spec can be used for + * the value). We thus allocate an extra byte to be able to store a string + * of max length with a terminating '\0'. + */ +struct wlp_device_info { + char name[33]; + char model_name[33]; + char manufacturer[65]; + char model_nr[33]; + char serial[33]; + struct wlp_dev_type prim_dev_type; +}; + +/** + * Macros for the WLP attributes + * + * There are quite a few attributes (total is 43). The attribute layout can be + * in one of three categories: one value, an array, an enum forced to 8 bits. + * These macros help with their definitions. + */ +#define wlp_attr(type, name) \ +struct wlp_attr_##name { \ + struct wlp_attr_hdr hdr; \ + type name; \ +} __attribute__((packed)); + +#define wlp_attr_array(type, name) \ +struct wlp_attr_##name { \ + struct wlp_attr_hdr hdr; \ + type name[]; \ +} __attribute__((packed)); + +/** + * WLP association attribute fields + * WLP Draft 0.99 [6.6.1 Table 6] + * + * Attributes appear in same order as the Table in the spec + * FIXME Does not define all attributes yet + */ + +/* Device name: Friendly name of sending device */ +wlp_attr_array(u8, dev_name) + +/* Enrollee Nonce: Random number generated by enrollee for an enrollment + * session */ +wlp_attr(struct wlp_nonce, enonce) + +/* Manufacturer name: Name of manufacturer of the sending device */ +wlp_attr_array(u8, manufacturer) + +/* WLP Message Type */ +wlp_attr(u8, msg_type) + +/* WLP Model name: Model name of sending device */ +wlp_attr_array(u8, model_name) + +/* WLP Model number: Model number of sending device */ +wlp_attr_array(u8, model_nr) + +/* Registrar Nonce: Random number generated by registrar for an enrollment + * session */ +wlp_attr(struct wlp_nonce, rnonce) + +/* Serial number of device */ +wlp_attr_array(u8, serial) + +/* UUID of enrollee */ +wlp_attr(struct wlp_uuid, uuid_e) + +/* UUID of registrar */ +wlp_attr(struct wlp_uuid, uuid_r) + +/* WLP Primary device type */ +wlp_attr(struct wlp_dev_type, prim_dev_type) + +/* WLP Secondary device type */ +wlp_attr(struct wlp_dev_type, sec_dev_type) + +/* WLP protocol version */ +wlp_attr(u8, version) + +/* WLP service set identifier */ +wlp_attr(struct wlp_uuid, wssid) + +/* WLP WSS name */ +wlp_attr_array(u8, wss_name) + +/* WLP WSS Secure Status */ +wlp_attr(u8, wss_sec_status) + +/* WSS Broadcast Address */ +wlp_attr(struct uwb_mac_addr, wss_bcast) + +/* WLP Accepting Enrollment */ +wlp_attr(u8, accept_enrl) + +/** + * WSS information attributes + * WLP Draft 0.99 [6.6.3 Table 15] + */ +struct wlp_wss_info { + struct wlp_attr_wssid wssid; + struct wlp_attr_wss_name name; + struct wlp_attr_accept_enrl accept; + struct wlp_attr_wss_sec_status sec_stat; + struct wlp_attr_wss_bcast bcast; +} __attribute__((packed)); + +/* WLP WSS Information */ +wlp_attr_array(struct wlp_wss_info, wss_info) + +/* WLP WSS Selection method */ +wlp_attr(u8, wss_sel_mthd) + +/* WLP WSS tag */ +wlp_attr(u8, wss_tag) + +/* WSS Virtual Address */ +wlp_attr(struct uwb_mac_addr, wss_virt) + +/* WLP association error */ +wlp_attr(u8, wlp_assc_err) + +/** + * WLP standard and abbreviated frames + * + * WLP Draft 0.99 [6.3] and [6.4] + * + * The difference between the WLP standard frame and the WLP + * abbreviated frame is that the standard frame includes the src + * and dest addresses from the Ethernet header, the abbreviated frame does + * not. + * The src/dest (as well as the type/length and client data) are already + * defined as part of the Ethernet header, we do not do this here. + * From this perspective the standard and abbreviated frames appear the + * same - they will be treated differently though. + * + * The size of this header is also captured in WLP_DATA_HLEN to enable + * interfaces to prepare their headroom. + */ +struct wlp_frame_std_abbrv_hdr { + struct wlp_frame_hdr hdr; + u8 tag; +} __attribute__((packed)); + +/** + * WLP association frames + * + * WLP Draft 0.99 [6.6] + */ +struct wlp_frame_assoc { + struct wlp_frame_hdr hdr; + enum wlp_assoc_type type:8; + struct wlp_attr_version version; + struct wlp_attr_msg_type msg_type; + u8 attr[]; +} __attribute__((packed)); + +/* Ethernet to dev address mapping */ +struct wlp_eda { + spinlock_t lock; + struct list_head cache; /* Eth<->Dev Addr cache */ +}; + +/** + * WSS information temporary storage + * + * This information is only stored temporarily during discovery. It should + * not be stored unless the device is enrolled in the advertised WSS. This + * is done mainly because we follow the letter of the spec in this regard. + * See WLP 0.99 [7.2.3]. + * When the device does become enrolled in a WSS the WSS information will + * be stored as part of the more comprehensive struct wlp_wss. + */ +struct wlp_wss_tmp_info { + char name[WLP_WSS_NAME_SIZE]; + u8 accept_enroll; + u8 sec_status; + struct uwb_mac_addr bcast; +}; + +struct wlp_wssid_e { + struct list_head node; + struct wlp_uuid wssid; + struct wlp_wss_tmp_info *info; +}; + +/** + * A cache entry of WLP neighborhood + * + * @node: head of list is wlp->neighbors + * @wssid: list of wssids of this neighbor, element is wlp_wssid_e + * @info: temporary storage for information learned during discovery. This + * storage is used together with the wssid_e temporary storage + * during discovery. + */ +struct wlp_neighbor_e { + struct list_head node; + struct wlp_uuid uuid; + struct uwb_dev *uwb_dev; + struct list_head wssid; /* Elements are wlp_wssid_e */ + struct wlp_device_info *info; +}; + +struct wlp; +/** + * Information for an association session in progress. + * + * @exp_message: The type of the expected message. Both this message and a + * F0 message (which can be sent in response to any + * association frame) will be accepted as a valid message for + * this session. + * @cb: The function that will be called upon receipt of this + * message. + * @cb_priv: Private data of callback + * @data: Data used in association process (always a sk_buff?) + * @neighbor: Address of neighbor with which association session is in + * progress. + */ +struct wlp_session { + enum wlp_assoc_type exp_message; + void (*cb)(struct wlp *); + void *cb_priv; + void *data; + struct uwb_dev_addr neighbor_addr; +}; + +/** + * WLP Service Set + * + * @mutex: used to protect entire WSS structure. + * + * @name: The WSS name is set to 65 bytes, 1 byte larger than the maximum + * allowed by the WLP spec. This is to have a null terminated string + * for display to the user. A maximum of 64 bytes will still be used + * when placing the WSS name field in association frames. + * + * @accept_enroll: Accepting enrollment: Set to one if registrar is + * accepting enrollment in WSS, or zero otherwise. + * + * Global and local information for each WSS in which we are enrolled. + * WLP 0.99 Section 7.2.1 and Section 7.2.2 + */ +struct wlp_wss { + struct mutex mutex; + struct kobject kobj; + /* Global properties. */ + struct wlp_uuid wssid; + u8 hash; + char name[WLP_WSS_NAME_SIZE]; + struct uwb_mac_addr bcast; + u8 secure_status:1; + u8 master_key[16]; + /* Local properties. */ + u8 tag; + struct uwb_mac_addr virtual_addr; + /* Extra */ + u8 accept_enroll:1; + enum wlp_wss_state state; +}; + +/** + * WLP main structure + * @mutex: protect changes to WLP structure. We only allow changes to the + * uuid, so currently this mutex only protects this field. + */ +struct wlp { + struct mutex mutex; + struct uwb_rc *rc; /* UWB radio controller */ + struct uwb_pal pal; + struct wlp_eda eda; + struct wlp_uuid uuid; + struct wlp_session *session; + struct wlp_wss wss; + struct mutex nbmutex; /* Neighbor mutex protects neighbors list */ + struct list_head neighbors; /* Elements are wlp_neighbor_e */ + struct uwb_notifs_handler uwb_notifs_handler; + struct wlp_device_info *dev_info; + void (*fill_device_info)(struct wlp *wlp, struct wlp_device_info *info); + int (*xmit_frame)(struct wlp *, struct sk_buff *, + struct uwb_dev_addr *); + void (*stop_queue)(struct wlp *); + void (*start_queue)(struct wlp *); +}; + +/* sysfs */ + + +struct wlp_wss_attribute { + struct attribute attr; + ssize_t (*show)(struct wlp_wss *wss, char *buf); + ssize_t (*store)(struct wlp_wss *wss, const char *buf, size_t count); +}; + +#define WSS_ATTR(_name, _mode, _show, _store) \ +static struct wlp_wss_attribute wss_attr_##_name = __ATTR(_name, _mode, \ + _show, _store) + +extern int wlp_setup(struct wlp *, struct uwb_rc *); +extern void wlp_remove(struct wlp *); +extern ssize_t wlp_neighborhood_show(struct wlp *, char *); +extern int wlp_wss_setup(struct net_device *, struct wlp_wss *); +extern void wlp_wss_remove(struct wlp_wss *); +extern ssize_t wlp_wss_activate_show(struct wlp_wss *, char *); +extern ssize_t wlp_wss_activate_store(struct wlp_wss *, const char *, size_t); +extern ssize_t wlp_eda_show(struct wlp *, char *); +extern ssize_t wlp_eda_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_uuid_show(struct wlp *, char *); +extern ssize_t wlp_uuid_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_name_show(struct wlp *, char *); +extern ssize_t wlp_dev_name_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_manufacturer_show(struct wlp *, char *); +extern ssize_t wlp_dev_manufacturer_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_model_name_show(struct wlp *, char *); +extern ssize_t wlp_dev_model_name_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_model_nr_show(struct wlp *, char *); +extern ssize_t wlp_dev_model_nr_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_serial_show(struct wlp *, char *); +extern ssize_t wlp_dev_serial_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_prim_category_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_category_store(struct wlp *, const char *, + size_t); +extern ssize_t wlp_dev_prim_OUI_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_OUI_store(struct wlp *, const char *, size_t); +extern ssize_t wlp_dev_prim_OUI_sub_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_OUI_sub_store(struct wlp *, const char *, + size_t); +extern ssize_t wlp_dev_prim_subcat_show(struct wlp *, char *); +extern ssize_t wlp_dev_prim_subcat_store(struct wlp *, const char *, + size_t); +extern int wlp_receive_frame(struct device *, struct wlp *, struct sk_buff *, + struct uwb_dev_addr *); +extern int wlp_prepare_tx_frame(struct device *, struct wlp *, + struct sk_buff *, struct uwb_dev_addr *); +void wlp_reset_all(struct wlp *wlp); + +/** + * Initialize WSS + */ +static inline +void wlp_wss_init(struct wlp_wss *wss) +{ + mutex_init(&wss->mutex); +} + +static inline +void wlp_init(struct wlp *wlp) +{ + INIT_LIST_HEAD(&wlp->neighbors); + mutex_init(&wlp->mutex); + mutex_init(&wlp->nbmutex); + wlp_wss_init(&wlp->wss); +} + + +#endif /* #ifndef __LINUX__WLP_H_ */ -- cgit v1.2.3 From da389eac31be24556a71dd59ea6539ae4cba5c15 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:12 +0100 Subject: uwb: add the umc bus The UMC bus is used for the capabilities exposed by a UWB Multi-interface Controller as described in the WHCI specification. Signed-off-by: David Vrabel --- drivers/uwb/Makefile | 6 ++ drivers/uwb/umc-bus.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/uwb/umc-dev.c | 104 +++++++++++++++++++++++ drivers/uwb/umc-drv.c | 31 +++++++ include/linux/uwb/umc.h | 194 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 553 insertions(+) create mode 100644 drivers/uwb/umc-bus.c create mode 100644 drivers/uwb/umc-dev.c create mode 100644 drivers/uwb/umc-drv.c create mode 100644 include/linux/uwb/umc.h (limited to 'include') diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile index 9a67be5ac5c1..41c9fca5f875 100644 --- a/drivers/uwb/Makefile +++ b/drivers/uwb/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_UWB) += uwb.o +obj-$(CONFIG_UWB_WHCI) += umc.o uwb-objs := \ address.o \ @@ -18,3 +19,8 @@ uwb-objs := \ scan.o \ uwb-debug.o \ uwbd.o + +umc-objs := \ + umc-bus.o \ + umc-dev.o \ + umc-drv.o diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c new file mode 100644 index 000000000000..2d8d62d9f53e --- /dev/null +++ b/drivers/uwb/umc-bus.c @@ -0,0 +1,218 @@ +/* + * Bus for UWB Multi-interface Controller capabilities. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include +#include +#include +#include + +static int umc_bus_unbind_helper(struct device *dev, void *data) +{ + struct device *parent = data; + + if (dev->parent == parent && dev->driver) + device_release_driver(dev); + return 0; +} + +/** + * umc_controller_reset - reset the whole UMC controller + * @umc: the UMC device for the radio controller. + * + * Drivers will be unbound from all UMC devices belonging to the + * controller and then the radio controller will be rebound. The + * radio controller is expected to do a full hardware reset when it is + * probed. + * + * If this is called while a probe() or remove() is in progress it + * will return -EAGAIN and not perform the reset. + */ +int umc_controller_reset(struct umc_dev *umc) +{ + struct device *parent = umc->dev.parent; + int ret; + + if (down_trylock(&parent->sem)) + return -EAGAIN; + bus_for_each_dev(&umc_bus_type, NULL, parent, umc_bus_unbind_helper); + ret = device_attach(&umc->dev); + if (ret == 1) + ret = 0; + up(&parent->sem); + + return ret; +} +EXPORT_SYMBOL_GPL(umc_controller_reset); + +/** + * umc_match_pci_id - match a UMC driver to a UMC device's parent PCI device. + * @umc_drv: umc driver with match_data pointing to a zero-terminated + * table of pci_device_id's. + * @umc: umc device whose parent is to be matched. + */ +int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc) +{ + const struct pci_device_id *id_table = umc_drv->match_data; + struct pci_dev *pci; + + if (umc->dev.parent->bus != &pci_bus_type) + return 0; + + pci = to_pci_dev(umc->dev.parent); + return pci_match_id(id_table, pci) != NULL; +} +EXPORT_SYMBOL_GPL(umc_match_pci_id); + +static int umc_bus_rescan_helper(struct device *dev, void *data) +{ + int ret = 0; + + if (!dev->driver) + ret = device_attach(dev); + + return ret < 0 ? ret : 0; +} + +static void umc_bus_rescan(void) +{ + int err; + + /* + * We can't use bus_rescan_devices() here as it deadlocks when + * it tries to retake the dev->parent semaphore. + */ + err = bus_for_each_dev(&umc_bus_type, NULL, NULL, umc_bus_rescan_helper); + if (err < 0) + printk(KERN_WARNING "%s: rescan of bus failed: %d\n", + KBUILD_MODNAME, err); +} + +static int umc_bus_match(struct device *dev, struct device_driver *drv) +{ + struct umc_dev *umc = to_umc_dev(dev); + struct umc_driver *umc_driver = to_umc_driver(drv); + + if (umc->cap_id == umc_driver->cap_id) { + if (umc_driver->match) + return umc_driver->match(umc_driver, umc); + else + return 1; + } + return 0; +} + +static int umc_device_probe(struct device *dev) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + int err; + + umc_driver = to_umc_driver(dev->driver); + umc = to_umc_dev(dev); + + get_device(dev); + err = umc_driver->probe(umc); + if (err) + put_device(dev); + else + umc_bus_rescan(); + + return err; +} + +static int umc_device_remove(struct device *dev) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + + umc_driver = to_umc_driver(dev->driver); + umc = to_umc_dev(dev); + + umc_driver->remove(umc); + put_device(dev); + return 0; +} + +static int umc_device_suspend(struct device *dev, pm_message_t state) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + int err = 0; + + umc = to_umc_dev(dev); + + if (dev->driver) { + umc_driver = to_umc_driver(dev->driver); + if (umc_driver->suspend) + err = umc_driver->suspend(umc, state); + } + return err; +} + +static int umc_device_resume(struct device *dev) +{ + struct umc_dev *umc; + struct umc_driver *umc_driver; + int err = 0; + + umc = to_umc_dev(dev); + + if (dev->driver) { + umc_driver = to_umc_driver(dev->driver); + if (umc_driver->resume) + err = umc_driver->resume(umc); + } + return err; +} + +static ssize_t capability_id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct umc_dev *umc = to_umc_dev(dev); + + return sprintf(buf, "0x%02x\n", umc->cap_id); +} + +static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct umc_dev *umc = to_umc_dev(dev); + + return sprintf(buf, "0x%04x\n", umc->version); +} + +static struct device_attribute umc_dev_attrs[] = { + __ATTR_RO(capability_id), + __ATTR_RO(version), + __ATTR_NULL, +}; + +struct bus_type umc_bus_type = { + .name = "umc", + .match = umc_bus_match, + .probe = umc_device_probe, + .remove = umc_device_remove, + .suspend = umc_device_suspend, + .resume = umc_device_resume, + .dev_attrs = umc_dev_attrs, +}; +EXPORT_SYMBOL_GPL(umc_bus_type); + +static int __init umc_bus_init(void) +{ + return bus_register(&umc_bus_type); +} +module_init(umc_bus_init); + +static void __exit umc_bus_exit(void) +{ + bus_unregister(&umc_bus_type); +} +module_exit(umc_bus_exit); + +MODULE_DESCRIPTION("UWB Multi-interface Controller capability bus"); +MODULE_AUTHOR("Cambridge Silicon Radio Ltd."); +MODULE_LICENSE("GPL"); diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c new file mode 100644 index 000000000000..aa44e1c1a102 --- /dev/null +++ b/drivers/uwb/umc-dev.c @@ -0,0 +1,104 @@ +/* + * UWB Multi-interface Controller device management. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include +#define D_LOCAL 0 +#include + +static void umc_device_release(struct device *dev) +{ + struct umc_dev *umc = to_umc_dev(dev); + + kfree(umc); +} + +/** + * umc_device_create - allocate a child UMC device + * @parent: parent of the new UMC device. + * @n: index of the new device. + * + * The new UMC device will have a bus ID of the parent with '-n' + * appended. + */ +struct umc_dev *umc_device_create(struct device *parent, int n) +{ + struct umc_dev *umc; + + umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL); + if (umc) { + snprintf(umc->dev.bus_id, sizeof(umc->dev.bus_id), "%s-%d", + parent->bus_id, n); + umc->dev.parent = parent; + umc->dev.bus = &umc_bus_type; + umc->dev.release = umc_device_release; + + umc->dev.dma_mask = parent->dma_mask; + } + return umc; +} +EXPORT_SYMBOL_GPL(umc_device_create); + +/** + * umc_device_register - register a UMC device + * @umc: pointer to the UMC device + * + * The memory resource for the UMC device is acquired and the device + * registered with the system. + */ +int umc_device_register(struct umc_dev *umc) +{ + int err; + + d_fnstart(3, &umc->dev, "(umc_dev %p)\n", umc); + + err = request_resource(umc->resource.parent, &umc->resource); + if (err < 0) { + dev_err(&umc->dev, "can't allocate resource range " + "%016Lx to %016Lx: %d\n", + (unsigned long long)umc->resource.start, + (unsigned long long)umc->resource.end, + err); + goto error_request_resource; + } + + err = device_register(&umc->dev); + if (err < 0) + goto error_device_register; + d_fnend(3, &umc->dev, "(umc_dev %p) = 0\n", umc); + return 0; + +error_device_register: + release_resource(&umc->resource); +error_request_resource: + d_fnend(3, &umc->dev, "(umc_dev %p) = %d\n", umc, err); + return err; +} +EXPORT_SYMBOL_GPL(umc_device_register); + +/** + * umc_device_unregister - unregister a UMC device + * @umc: pointer to the UMC device + * + * First we unregister the device, make sure the driver can do it's + * resource release thing and then we try to release any left over + * resources. We take a ref to the device, to make sure it doesn't + * dissapear under our feet. + */ +void umc_device_unregister(struct umc_dev *umc) +{ + struct device *dev; + if (!umc) + return; + dev = get_device(&umc->dev); + d_fnstart(3, dev, "(umc_dev %p)\n", umc); + device_unregister(&umc->dev); + release_resource(&umc->resource); + d_fnend(3, dev, "(umc_dev %p) = void\n", umc); + put_device(dev); +} +EXPORT_SYMBOL_GPL(umc_device_unregister); diff --git a/drivers/uwb/umc-drv.c b/drivers/uwb/umc-drv.c new file mode 100644 index 000000000000..367b5eb85d60 --- /dev/null +++ b/drivers/uwb/umc-drv.c @@ -0,0 +1,31 @@ +/* + * UWB Multi-interface Controller driver management. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include + +int __umc_driver_register(struct umc_driver *umc_drv, struct module *module, + const char *mod_name) +{ + umc_drv->driver.name = umc_drv->name; + umc_drv->driver.owner = module; + umc_drv->driver.mod_name = mod_name; + umc_drv->driver.bus = &umc_bus_type; + + return driver_register(&umc_drv->driver); +} +EXPORT_SYMBOL_GPL(__umc_driver_register); + +/** + * umc_driver_register - unregister a UMC capabiltity driver. + * @umc_drv: pointer to the driver. + */ +void umc_driver_unregister(struct umc_driver *umc_drv) +{ + driver_unregister(&umc_drv->driver); +} +EXPORT_SYMBOL_GPL(umc_driver_unregister); diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h new file mode 100644 index 000000000000..36a39e34f8d7 --- /dev/null +++ b/include/linux/uwb/umc.h @@ -0,0 +1,194 @@ +/* + * UWB Multi-interface Controller support. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GPLv2 + * + * UMC (UWB Multi-interface Controller) capabilities (e.g., radio + * controller, host controller) are presented as devices on the "umc" + * bus. + * + * The radio controller is not strictly a UMC capability but it's + * useful to present it as such. + * + * References: + * + * [WHCI] Wireless Host Controller Interface Specification for + * Certified Wireless Universal Serial Bus, revision 0.95. + * + * How this works is kind of convoluted but simple. The whci.ko driver + * loads when WHCI devices are detected. These WHCI devices expose + * many devices in the same PCI function (they couldn't have reused + * functions, no), so for each PCI function that exposes these many + * devices, whci ceates a umc_dev [whci_probe() -> whci_add_cap()] + * with umc_device_create() and adds it to the bus with + * umc_device_register(). + * + * umc_device_register() calls device_register() which will push the + * bus management code to load your UMC driver's somehting_probe() + * that you have registered for that capability code. + * + * Now when the WHCI device is removed, whci_remove() will go over + * each umc_dev assigned to each of the PCI function's capabilities + * and through whci_del_cap() call umc_device_unregister() each + * created umc_dev. Of course, if you are bound to the device, your + * driver's something_remove() will be called. + */ + +#ifndef _LINUX_UWB_UMC_H_ +#define _LINUX_UWB_UMC_H_ + +#include +#include + +/* + * UMC capability IDs. + * + * 0x00 is reserved so use it for the radio controller device. + * + * [WHCI] table 2-8 + */ +#define UMC_CAP_ID_WHCI_RC 0x00 /* radio controller */ +#define UMC_CAP_ID_WHCI_WUSB_HC 0x01 /* WUSB host controller */ + +/** + * struct umc_dev - UMC capability device + * + * @version: version of the specification this capability conforms to. + * @cap_id: capability ID. + * @bar: PCI Bar (64 bit) where the resource lies + * @resource: register space resource. + * @irq: interrupt line. + */ +struct umc_dev { + u16 version; + u8 cap_id; + u8 bar; + struct resource resource; + unsigned irq; + struct device dev; +}; + +#define to_umc_dev(d) container_of(d, struct umc_dev, dev) + +/** + * struct umc_driver - UMC capability driver + * @cap_id: supported capability ID. + * @match: driver specific capability matching function. + * @match_data: driver specific data for match() (e.g., a + * table of pci_device_id's if umc_match_pci_id() is used). + */ +struct umc_driver { + char *name; + u8 cap_id; + int (*match)(struct umc_driver *, struct umc_dev *); + const void *match_data; + + int (*probe)(struct umc_dev *); + void (*remove)(struct umc_dev *); + int (*suspend)(struct umc_dev *, pm_message_t state); + int (*resume)(struct umc_dev *); + + struct device_driver driver; +}; + +#define to_umc_driver(d) container_of(d, struct umc_driver, driver) + +extern struct bus_type umc_bus_type; + +struct umc_dev *umc_device_create(struct device *parent, int n); +int __must_check umc_device_register(struct umc_dev *umc); +void umc_device_unregister(struct umc_dev *umc); + +int __must_check __umc_driver_register(struct umc_driver *umc_drv, + struct module *mod, + const char *mod_name); + +/** + * umc_driver_register - register a UMC capabiltity driver. + * @umc_drv: pointer to the driver. + */ +static inline int __must_check umc_driver_register(struct umc_driver *umc_drv) +{ + return __umc_driver_register(umc_drv, THIS_MODULE, KBUILD_MODNAME); +} +void umc_driver_unregister(struct umc_driver *umc_drv); + +/* + * Utility function you can use to match (umc_driver->match) against a + * null-terminated array of 'struct pci_device_id' in + * umc_driver->match_data. + */ +int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc); + +/** + * umc_parent_pci_dev - return the UMC's parent PCI device or NULL if none + * @umc_dev: UMC device whose parent PCI device we are looking for + * + * DIRTY!!! DON'T RELY ON THIS + * + * FIXME: This is as dirty as it gets, but we need some way to check + * the correct type of umc_dev->parent (so that for example, we can + * cast to pci_dev). Casting to pci_dev is necesary because at some + * point we need to request resources from the device. Mapping is + * easily over come (ioremap and stuff are bus agnostic), but hooking + * up to some error handlers (such as pci error handlers) might need + * this. + * + * THIS might (probably will) be removed in the future, so don't count + * on it. + */ +static inline struct pci_dev *umc_parent_pci_dev(struct umc_dev *umc_dev) +{ + struct pci_dev *pci_dev = NULL; + if (umc_dev->dev.parent->bus == &pci_bus_type) + pci_dev = to_pci_dev(umc_dev->dev.parent); + return pci_dev; +} + +/** + * umc_dev_get() - reference a UMC device. + * @umc_dev: Pointer to UMC device. + * + * NOTE: we are assuming in this whole scheme that the parent device + * is referenced at _probe() time and unreferenced at _remove() + * time by the parent's subsystem. + */ +static inline struct umc_dev *umc_dev_get(struct umc_dev *umc_dev) +{ + get_device(&umc_dev->dev); + return umc_dev; +} + +/** + * umc_dev_put() - unreference a UMC device. + * @umc_dev: Pointer to UMC device. + */ +static inline void umc_dev_put(struct umc_dev *umc_dev) +{ + put_device(&umc_dev->dev); +} + +/** + * umc_set_drvdata - set UMC device's driver data. + * @umc_dev: Pointer to UMC device. + * @data: Data to set. + */ +static inline void umc_set_drvdata(struct umc_dev *umc_dev, void *data) +{ + dev_set_drvdata(&umc_dev->dev, data); +} + +/** + * umc_get_drvdata - recover UMC device's driver data. + * @umc_dev: Pointer to UMC device. + */ +static inline void *umc_get_drvdata(struct umc_dev *umc_dev) +{ + return dev_get_drvdata(&umc_dev->dev); +} + +int umc_controller_reset(struct umc_dev *umc); + +#endif /* #ifndef _LINUX_UWB_UMC_H_ */ -- cgit v1.2.3 From 8f1b678ab900c2bda1620dfb6e1f1f02604fc3a2 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:13 +0100 Subject: uwb: add the driver to enumerate WHCI capabilities This enumerates the capabilties of a WHCI device, adding a umc device for each one. Signed-off-by: David Vrabel --- drivers/uwb/Makefile | 2 +- drivers/uwb/whci.c | 269 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/uwb/whci.h | 117 +++++++++++++++++++++ 3 files changed, 387 insertions(+), 1 deletion(-) create mode 100644 drivers/uwb/whci.c create mode 100644 include/linux/uwb/whci.h (limited to 'include') diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile index 41c9fca5f875..b054471af28d 100644 --- a/drivers/uwb/Makefile +++ b/drivers/uwb/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_UWB) += uwb.o -obj-$(CONFIG_UWB_WHCI) += umc.o +obj-$(CONFIG_UWB_WHCI) += umc.o whci.o uwb-objs := \ address.o \ diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c new file mode 100644 index 000000000000..3df2388f908f --- /dev/null +++ b/drivers/uwb/whci.c @@ -0,0 +1,269 @@ +/* + * WHCI UWB Multi-interface Controller enumerator. + * + * Copyright (C) 2007 Cambridge Silicon Radio Ltd. + * + * This file is released under the GNU GPL v2. + */ +#include +#include +#include +#include +#include +#include + +struct whci_card { + struct pci_dev *pci; + void __iomem *uwbbase; + u8 n_caps; + struct umc_dev *devs[0]; +}; + + +/* Fix faulty HW :( */ +static +u64 whci_capdata_quirks(struct whci_card *card, u64 capdata) +{ + u64 capdata_orig = capdata; + struct pci_dev *pci_dev = card->pci; + if (pci_dev->vendor == PCI_VENDOR_ID_INTEL + && (pci_dev->device == 0x0c3b || pci_dev->device == 0004) + && pci_dev->class == 0x0d1010) { + switch (UWBCAPDATA_TO_CAP_ID(capdata)) { + /* WLP capability has 0x100 bytes of aperture */ + case 0x80: + capdata |= 0x40 << 8; break; + /* WUSB capability has 0x80 bytes of aperture + * and ID is 1 */ + case 0x02: + capdata &= ~0xffff; + capdata |= 0x2001; + break; + } + } + if (capdata_orig != capdata) + dev_warn(&pci_dev->dev, + "PCI v%04x d%04x c%06x#%02x: " + "corrected capdata from %016Lx to %016Lx\n", + pci_dev->vendor, pci_dev->device, pci_dev->class, + (unsigned)UWBCAPDATA_TO_CAP_ID(capdata), + (unsigned long long)capdata_orig, + (unsigned long long)capdata); + return capdata; +} + + +/** + * whci_wait_for - wait for a WHCI register to be set + * + * Polls (for at most @max_ms ms) until '*@reg & @mask == @result'. + */ +int whci_wait_for(struct device *dev, u32 __iomem *reg, u32 mask, u32 result, + unsigned long max_ms, const char *tag) +{ + unsigned t = 0; + u32 val; + for (;;) { + val = le_readl(reg); + if ((val & mask) == result) + break; + msleep(10); + if (t >= max_ms) { + dev_err(dev, "timed out waiting for %s ", tag); + return -ETIMEDOUT; + } + t += 10; + } + return 0; +} +EXPORT_SYMBOL_GPL(whci_wait_for); + + +/* + * NOTE: the capinfo and capdata registers are slightly different + * (size and cap-id fields). So for cap #0, we need to fill + * in. Size comes from the size of the register block + * (statically calculated); cap_id comes from nowhere, we use + * zero, that is reserved, for the radio controller, because + * none was defined at the spec level. + */ +static int whci_add_cap(struct whci_card *card, int n) +{ + struct umc_dev *umc; + u64 capdata; + int bar, err; + + umc = umc_device_create(&card->pci->dev, n); + if (umc == NULL) + return -ENOMEM; + + capdata = le_readq(card->uwbbase + UWBCAPDATA(n)); + + bar = UWBCAPDATA_TO_BAR(capdata) << 1; + + capdata = whci_capdata_quirks(card, capdata); + /* Capability 0 is the radio controller. It's size is 32 + * bytes (WHCI0.95[2.3, T2-9]). */ + umc->version = UWBCAPDATA_TO_VERSION(capdata); + umc->cap_id = n == 0 ? 0 : UWBCAPDATA_TO_CAP_ID(capdata); + umc->bar = bar; + umc->resource.start = pci_resource_start(card->pci, bar) + + UWBCAPDATA_TO_OFFSET(capdata); + umc->resource.end = umc->resource.start + + (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1; + umc->resource.name = umc->dev.bus_id; + umc->resource.flags = card->pci->resource[bar].flags; + umc->resource.parent = &card->pci->resource[bar]; + umc->irq = card->pci->irq; + + err = umc_device_register(umc); + if (err < 0) + goto error; + card->devs[n] = umc; + return 0; + +error: + kfree(umc); + return err; +} + +static void whci_del_cap(struct whci_card *card, int n) +{ + struct umc_dev *umc = card->devs[n]; + + if (umc != NULL) + umc_device_unregister(umc); +} + +static int whci_n_caps(struct pci_dev *pci) +{ + void __iomem *uwbbase; + u64 capinfo; + + uwbbase = pci_iomap(pci, 0, 8); + if (!uwbbase) + return -ENOMEM; + capinfo = le_readq(uwbbase + UWBCAPINFO); + pci_iounmap(pci, uwbbase); + + return UWBCAPINFO_TO_N_CAPS(capinfo); +} + +static int whci_probe(struct pci_dev *pci, const struct pci_device_id *id) +{ + struct whci_card *card; + int err, n_caps, n; + + err = pci_enable_device(pci); + if (err < 0) + goto error; + pci_enable_msi(pci); + pci_set_master(pci); + err = -ENXIO; + if (!pci_set_dma_mask(pci, DMA_64BIT_MASK)) + pci_set_consistent_dma_mask(pci, DMA_64BIT_MASK); + else if (!pci_set_dma_mask(pci, DMA_32BIT_MASK)) + pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK); + else + goto error_dma; + + err = n_caps = whci_n_caps(pci); + if (n_caps < 0) + goto error_ncaps; + + err = -ENOMEM; + card = kzalloc(sizeof(struct whci_card) + + sizeof(struct whci_dev *) * (n_caps + 1), + GFP_KERNEL); + if (card == NULL) + goto error_kzalloc; + card->pci = pci; + card->n_caps = n_caps; + + err = -EBUSY; + if (!request_mem_region(pci_resource_start(pci, 0), + UWBCAPDATA_SIZE(card->n_caps), + "whci (capability data)")) + goto error_request_memregion; + err = -ENOMEM; + card->uwbbase = pci_iomap(pci, 0, UWBCAPDATA_SIZE(card->n_caps)); + if (!card->uwbbase) + goto error_iomap; + + /* Add each capability. */ + for (n = 0; n <= card->n_caps; n++) { + err = whci_add_cap(card, n); + if (err < 0 && n == 0) { + dev_err(&pci->dev, "cannot bind UWB radio controller:" + " %d\n", err); + goto error_bind; + } + if (err < 0) + dev_warn(&pci->dev, "warning: cannot bind capability " + "#%u: %d\n", n, err); + } + pci_set_drvdata(pci, card); + return 0; + +error_bind: + pci_iounmap(pci, card->uwbbase); +error_iomap: + release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps)); +error_request_memregion: + kfree(card); +error_kzalloc: +error_ncaps: +error_dma: + pci_disable_msi(pci); + pci_disable_device(pci); +error: + return err; +} + +static void whci_remove(struct pci_dev *pci) +{ + struct whci_card *card = pci_get_drvdata(pci); + int n; + + pci_set_drvdata(pci, NULL); + /* Unregister each capability in reverse (so the master device + * is unregistered last). */ + for (n = card->n_caps; n >= 0 ; n--) + whci_del_cap(card, n); + pci_iounmap(pci, card->uwbbase); + release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps)); + kfree(card); + pci_disable_msi(pci); + pci_disable_device(pci); +} + +static struct pci_device_id whci_id_table[] = { + { PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) }, + { 0 }, +}; +MODULE_DEVICE_TABLE(pci, whci_id_table); + + +static struct pci_driver whci_driver = { + .name = "whci", + .id_table = whci_id_table, + .probe = whci_probe, + .remove = whci_remove, +}; + +static int __init whci_init(void) +{ + return pci_register_driver(&whci_driver); +} + +static void __exit whci_exit(void) +{ + pci_unregister_driver(&whci_driver); +} + +module_init(whci_init); +module_exit(whci_exit); + +MODULE_DESCRIPTION("WHCI UWB Multi-interface Controller enumerator"); +MODULE_AUTHOR("Cambridge Silicon Radio Ltd."); +MODULE_LICENSE("GPL"); diff --git a/include/linux/uwb/whci.h b/include/linux/uwb/whci.h new file mode 100644 index 000000000000..915ec23042d4 --- /dev/null +++ b/include/linux/uwb/whci.h @@ -0,0 +1,117 @@ +/* + * Wireless Host Controller Interface for Ultra-Wide-Band and Wireless USB + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * + * References: + * [WHCI] Wireless Host Controller Interface Specification for + * Certified Wireless Universal Serial Bus, revision 0.95. + */ +#ifndef _LINUX_UWB_WHCI_H_ +#define _LINUX_UWB_WHCI_H_ + +#include + +/* + * UWB interface capability registers (offsets from UWBBASE) + * + * [WHCI] section 2.2 + */ +#define UWBCAPINFO 0x00 /* == UWBCAPDATA(0) */ +# define UWBCAPINFO_TO_N_CAPS(c) (((c) >> 0) & 0xFull) +#define UWBCAPDATA(n) (8*(n)) +# define UWBCAPDATA_TO_VERSION(c) (((c) >> 32) & 0xFFFFull) +# define UWBCAPDATA_TO_OFFSET(c) (((c) >> 18) & 0x3FFFull) +# define UWBCAPDATA_TO_BAR(c) (((c) >> 16) & 0x3ull) +# define UWBCAPDATA_TO_SIZE(c) ((((c) >> 8) & 0xFFull) * sizeof(u32)) +# define UWBCAPDATA_TO_CAP_ID(c) (((c) >> 0) & 0xFFull) + +/* Size of the WHCI capability data (including the RC capability) for + a device with n capabilities. */ +#define UWBCAPDATA_SIZE(n) (8 + 8*(n)) + + +/* + * URC registers (offsets from URCBASE) + * + * [WHCI] section 2.3 + */ +#define URCCMD 0x00 +# define URCCMD_RESET (1 << 31) /* UMC Hardware reset */ +# define URCCMD_RS (1 << 30) /* Run/Stop */ +# define URCCMD_EARV (1 << 29) /* Event Address Register Valid */ +# define URCCMD_ACTIVE (1 << 15) /* Command is active */ +# define URCCMD_IWR (1 << 14) /* Interrupt When Ready */ +# define URCCMD_SIZE_MASK 0x00000fff /* Command size mask */ +#define URCSTS 0x04 +# define URCSTS_EPS (1 << 17) /* Event Processing Status */ +# define URCSTS_HALTED (1 << 16) /* RC halted */ +# define URCSTS_HSE (1 << 10) /* Host System Error...fried */ +# define URCSTS_ER (1 << 9) /* Event Ready */ +# define URCSTS_RCI (1 << 8) /* Ready for Command Interrupt */ +# define URCSTS_INT_MASK 0x00000700 /* URC interrupt sources */ +# define URCSTS_ISI 0x000000ff /* Interrupt Source Identification */ +#define URCINTR 0x08 +# define URCINTR_EN_ALL 0x000007ff /* Enable all interrupt sources */ +#define URCCMDADDR 0x10 +#define URCEVTADDR 0x18 +# define URCEVTADDR_OFFSET_MASK 0xfff /* Event pointer offset mask */ + + +/** Write 32 bit @value to little endian register at @addr */ +static inline +void le_writel(u32 value, void __iomem *addr) +{ + iowrite32(value, addr); +} + + +/** Read from 32 bit little endian register at @addr */ +static inline +u32 le_readl(void __iomem *addr) +{ + return ioread32(addr); +} + + +/** Write 64 bit @value to little endian register at @addr */ +static inline +void le_writeq(u64 value, void __iomem *addr) +{ + iowrite32(value, addr); + iowrite32(value >> 32, addr + 4); +} + + +/** Read from 64 bit little endian register at @addr */ +static inline +u64 le_readq(void __iomem *addr) +{ + u64 value; + value = ioread32(addr); + value |= (u64)ioread32(addr + 4) << 32; + return value; +} + +extern int whci_wait_for(struct device *dev, u32 __iomem *reg, + u32 mask, u32 result, + unsigned long max_ms, const char *tag); + +#endif /* #ifndef _LINUX_UWB_WHCI_H_ */ -- cgit v1.2.3 From c7f736484f8ecde4dc1bc8459179c4d65f2ccbe4 Mon Sep 17 00:00:00 2001 From: Inaky Perez-Gonzalez Date: Wed, 17 Sep 2008 16:34:22 +0100 Subject: wusb: add the Wireless USB include files. Common header files derived from the WUSB 1.0 specification. Signed-off-by: David Vrabel --- include/linux/usb/wusb-wa.h | 271 +++++++++++++++++++++++++++++++ include/linux/usb/wusb.h | 376 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 647 insertions(+) create mode 100644 include/linux/usb/wusb-wa.h create mode 100644 include/linux/usb/wusb.h (limited to 'include') diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h new file mode 100644 index 000000000000..a102561e7026 --- /dev/null +++ b/include/linux/usb/wusb-wa.h @@ -0,0 +1,271 @@ +/* + * Wireless USB Wire Adapter constants and structures. + * + * Copyright (C) 2005-2006 Intel Corporation. + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: docs + * FIXME: organize properly, group logically + * + * All the event structures are defined in uwb/spec.h, as they are + * common to the WHCI and WUSB radio control interfaces. + * + * References: + * [WUSB] Wireless Universal Serial Bus Specification, revision 1.0, ch8 + */ +#ifndef __LINUX_USB_WUSB_WA_H +#define __LINUX_USB_WUSB_WA_H + +/** + * Radio Command Request for the Radio Control Interface + * + * Radio Control Interface command and event codes are the same as + * WHCI, and listed in include/linux/uwb.h:UWB_RC_{CMD,EVT}_* + */ +enum { + WA_EXEC_RC_CMD = 40, /* Radio Control command Request */ +}; + +/* Wireless Adapter Requests ([WUSB] table 8-51) */ +enum { + WUSB_REQ_ADD_MMC_IE = 20, + WUSB_REQ_REMOVE_MMC_IE = 21, + WUSB_REQ_SET_NUM_DNTS = 22, + WUSB_REQ_SET_CLUSTER_ID = 23, + WUSB_REQ_SET_DEV_INFO = 24, + WUSB_REQ_GET_TIME = 25, + WUSB_REQ_SET_STREAM_IDX = 26, + WUSB_REQ_SET_WUSB_MAS = 27, +}; + + +/* Wireless Adapter WUSB Channel Time types ([WUSB] table 8-52) */ +enum { + WUSB_TIME_ADJ = 0, + WUSB_TIME_BPST = 1, + WUSB_TIME_WUSB = 2, +}; + +enum { + WA_ENABLE = 0x01, + WA_RESET = 0x02, + RPIPE_PAUSE = 0x1, +}; + +/* Responses from Get Status request ([WUSB] section 8.3.1.6) */ +enum { + WA_STATUS_ENABLED = 0x01, + WA_STATUS_RESETTING = 0x02 +}; + +enum rpipe_crs { + RPIPE_CRS_CTL = 0x01, + RPIPE_CRS_ISO = 0x02, + RPIPE_CRS_BULK = 0x04, + RPIPE_CRS_INTR = 0x08 +}; + +/** + * RPipe descriptor ([WUSB] section 8.5.2.11) + * + * FIXME: explain rpipes + */ +struct usb_rpipe_descriptor { + u8 bLength; + u8 bDescriptorType; + __le16 wRPipeIndex; + __le16 wRequests; + __le16 wBlocks; /* rw if 0 */ + __le16 wMaxPacketSize; /* rw? */ + u8 bHSHubAddress; /* reserved: 0 */ + u8 bHSHubPort; /* ??? FIXME ??? */ + u8 bSpeed; /* rw: xfer rate 'enum uwb_phy_rate' */ + u8 bDeviceAddress; /* rw: Target device address */ + u8 bEndpointAddress; /* rw: Target EP address */ + u8 bDataSequence; /* ro: Current Data sequence */ + __le32 dwCurrentWindow; /* ro */ + u8 bMaxDataSequence; /* ro?: max supported seq */ + u8 bInterval; /* rw: */ + u8 bOverTheAirInterval; /* rw: */ + u8 bmAttribute; /* ro? */ + u8 bmCharacteristics; /* ro? enum rpipe_attr, supported xsactions */ + u8 bmRetryOptions; /* rw? */ + __le16 wNumTransactionErrors; /* rw */ +} __attribute__ ((packed)); + +/** + * Wire Adapter Notification types ([WUSB] sections 8.4.5 & 8.5.4) + * + * These are the notifications coming on the notification endpoint of + * an HWA and a DWA. + */ +enum wa_notif_type { + DWA_NOTIF_RWAKE = 0x91, + DWA_NOTIF_PORTSTATUS = 0x92, + WA_NOTIF_TRANSFER = 0x93, + HWA_NOTIF_BPST_ADJ = 0x94, + HWA_NOTIF_DN = 0x95, +}; + +/** + * Wire Adapter notification header + * + * Notifications coming from a wire adapter use a common header + * defined in [WUSB] sections 8.4.5 & 8.5.4. + */ +struct wa_notif_hdr { + u8 bLength; + u8 bNotifyType; /* enum wa_notif_type */ +} __attribute__((packed)); + +/** + * HWA DN Received notification [(WUSB] section 8.5.4.2) + * + * The DNData is specified in WUSB1.0[7.6]. For each device + * notification we received, we just need to dispatch it. + * + * @dndata: this is really an array of notifications, but all start + * with the same header. + */ +struct hwa_notif_dn { + struct wa_notif_hdr hdr; + u8 bSourceDeviceAddr; /* from errata 2005/07 */ + u8 bmAttributes; + struct wusb_dn_hdr dndata[]; +} __attribute__((packed)); + +/* [WUSB] section 8.3.3 */ +enum wa_xfer_type { + WA_XFER_TYPE_CTL = 0x80, + WA_XFER_TYPE_BI = 0x81, /* bulk/interrupt */ + WA_XFER_TYPE_ISO = 0x82, + WA_XFER_RESULT = 0x83, + WA_XFER_ABORT = 0x84, +}; + +/* [WUSB] section 8.3.3 */ +struct wa_xfer_hdr { + u8 bLength; /* 0x18 */ + u8 bRequestType; /* 0x80 WA_REQUEST_TYPE_CTL */ + __le16 wRPipe; /* RPipe index */ + __le32 dwTransferID; /* Host-assigned ID */ + __le32 dwTransferLength; /* Length of data to xfer */ + u8 bTransferSegment; +} __attribute__((packed)); + +struct wa_xfer_ctl { + struct wa_xfer_hdr hdr; + u8 bmAttribute; + __le16 wReserved; + struct usb_ctrlrequest baSetupData; +} __attribute__((packed)); + +struct wa_xfer_bi { + struct wa_xfer_hdr hdr; + u8 bReserved; + __le16 wReserved; +} __attribute__((packed)); + +struct wa_xfer_hwaiso { + struct wa_xfer_hdr hdr; + u8 bReserved; + __le16 wPresentationTime; + __le32 dwNumOfPackets; + /* FIXME: u8 pktdata[]? */ +} __attribute__((packed)); + +/* [WUSB] section 8.3.3.5 */ +struct wa_xfer_abort { + u8 bLength; + u8 bRequestType; + __le16 wRPipe; /* RPipe index */ + __le32 dwTransferID; /* Host-assigned ID */ +} __attribute__((packed)); + +/** + * WA Transfer Complete notification ([WUSB] section 8.3.3.3) + * + */ +struct wa_notif_xfer { + struct wa_notif_hdr hdr; + u8 bEndpoint; + u8 Reserved; +} __attribute__((packed)); + +/** Transfer result basic codes [WUSB] table 8-15 */ +enum { + WA_XFER_STATUS_SUCCESS, + WA_XFER_STATUS_HALTED, + WA_XFER_STATUS_DATA_BUFFER_ERROR, + WA_XFER_STATUS_BABBLE, + WA_XFER_RESERVED, + WA_XFER_STATUS_NOT_FOUND, + WA_XFER_STATUS_INSUFFICIENT_RESOURCE, + WA_XFER_STATUS_TRANSACTION_ERROR, + WA_XFER_STATUS_ABORTED, + WA_XFER_STATUS_RPIPE_NOT_READY, + WA_XFER_INVALID_FORMAT, + WA_XFER_UNEXPECTED_SEGMENT_NUMBER, + WA_XFER_STATUS_RPIPE_TYPE_MISMATCH, +}; + +/** [WUSB] section 8.3.3.4 */ +struct wa_xfer_result { + struct wa_notif_hdr hdr; + __le32 dwTransferID; + __le32 dwTransferLength; + u8 bTransferSegment; + u8 bTransferStatus; + __le32 dwNumOfPackets; +} __attribute__((packed)); + +/** + * Wire Adapter Class Descriptor ([WUSB] section 8.5.2.7). + * + * NOTE: u16 fields are read Little Endian from the hardware. + * + * @bNumPorts is the original max number of devices that the host can + * connect; we might chop this so the stack can handle + * it. In case you need to access it, use wusbhc->ports_max + * if it is a Wireless USB WA. + */ +struct usb_wa_descriptor { + u8 bLength; + u8 bDescriptorType; + u16 bcdWAVersion; + u8 bNumPorts; /* don't use!! */ + u8 bmAttributes; /* Reserved == 0 */ + u16 wNumRPipes; + u16 wRPipeMaxBlock; + u8 bRPipeBlockSize; + u8 bPwrOn2PwrGood; + u8 bNumMMCIEs; + u8 DeviceRemovable; /* FIXME: in DWA this is up to 16 bytes */ +} __attribute__((packed)); + +/** + * HWA Device Information Buffer (WUSB1.0[T8.54]) + */ +struct hwa_dev_info { + u8 bmDeviceAvailability[32]; /* FIXME: ignored for now */ + u8 bDeviceAddress; + __le16 wPHYRates; + u8 bmDeviceAttribute; +} __attribute__((packed)); + +#endif /* #ifndef __LINUX_USB_WUSB_WA_H */ diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h new file mode 100644 index 000000000000..5f401b644ed5 --- /dev/null +++ b/include/linux/usb/wusb.h @@ -0,0 +1,376 @@ +/* + * Wireless USB Standard Definitions + * Event Size Tables + * + * Copyright (C) 2005-2006 Intel Corporation + * Inaky Perez-Gonzalez + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * + * FIXME: docs + * FIXME: organize properly, group logically + * + * All the event structures are defined in uwb/spec.h, as they are + * common to the WHCI and WUSB radio control interfaces. + */ + +#ifndef __WUSB_H__ +#define __WUSB_H__ + +#include +#include +#include +#include +#include + +/** + * WUSB Information Element header + * + * I don't know why, they decided to make it different to the MBOA MAC + * IE Header; beats me. + */ +struct wuie_hdr { + u8 bLength; + u8 bIEIdentifier; +} __attribute__((packed)); + +enum { + WUIE_ID_WCTA = 0x80, + WUIE_ID_CONNECTACK, + WUIE_ID_HOST_INFO, + WUIE_ID_CHANGE_ANNOUNCE, + WUIE_ID_DEVICE_DISCONNECT, + WUIE_ID_HOST_DISCONNECT, + WUIE_ID_KEEP_ALIVE = 0x89, + WUIE_ID_ISOCH_DISCARD, + WUIE_ID_RESET_DEVICE, +}; + +/** + * Maximum number of array elements in a WUSB IE. + * + * WUSB1.0[7.5 before table 7-38] says that in WUSB IEs that + * are "arrays" have to limited to 4 elements. So we define it + * like that to ease up and submit only the neeed size. + */ +#define WUIE_ELT_MAX 4 + +/** + * Wrapper for the data that defines a CHID, a CDID or a CK + * + * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of + * data. In order to avoid confusion and enforce types, we wrap it. + * + * Make it packed, as we use it in some hw defintions. + */ +struct wusb_ckhdid { + u8 data[16]; +} __attribute__((packed)); + +const static +struct wusb_ckhdid wusb_ckhdid_zero = { .data = { 0 } }; + +#define WUSB_CKHDID_STRSIZE (3 * sizeof(struct wusb_ckhdid) + 1) + +/** + * WUSB IE: Host Information (WUSB1.0[7.5.2]) + * + * Used to provide information about the host to the Wireless USB + * devices in range (CHID can be used as an ASCII string). + */ +struct wuie_host_info { + struct wuie_hdr hdr; + __le16 attributes; + struct wusb_ckhdid CHID; +} __attribute__((packed)); + +/** + * WUSB IE: Connect Ack (WUSB1.0[7.5.1]) + * + * Used to acknowledge device connect requests. See note for + * WUIE_ELT_MAX. + */ +struct wuie_connect_ack { + struct wuie_hdr hdr; + struct { + struct wusb_ckhdid CDID; + u8 bDeviceAddress; /* 0 means unused */ + u8 bReserved; + } blk[WUIE_ELT_MAX]; +} __attribute__((packed)); + +/** + * WUSB IE Host Information Element, Connect Availability + * + * WUSB1.0[7.5.2], bmAttributes description + */ +enum { + WUIE_HI_CAP_RECONNECT = 0, + WUIE_HI_CAP_LIMITED, + WUIE_HI_CAP_RESERVED, + WUIE_HI_CAP_ALL, +}; + +/** + * WUSB IE: Channel Stop (WUSB1.0[7.5.8]) + * + * Tells devices the host is going to stop sending MMCs and will dissapear. + */ +struct wuie_channel_stop { + struct wuie_hdr hdr; + u8 attributes; + u8 timestamp[3]; +} __attribute__((packed)); + +/** + * WUSB IE: Keepalive (WUSB1.0[7.5.9]) + * + * Ask device(s) to send keepalives. + */ +struct wuie_keep_alive { + struct wuie_hdr hdr; + u8 bDeviceAddress[WUIE_ELT_MAX]; +} __attribute__((packed)); + +/** + * WUSB IE: Reset device (WUSB1.0[7.5.11]) + * + * Tell device to reset; in all truth, we can fit 4 CDIDs, but we only + * use it for one at the time... + * + * In any case, this request is a wee bit silly: why don't they target + * by address?? + */ +struct wuie_reset { + struct wuie_hdr hdr; + struct wusb_ckhdid CDID; +} __attribute__((packed)); + +/** + * WUSB IE: Disconnect device (WUSB1.0[7.5.11]) + * + * Tell device to disconnect; we can fit 4 addresses, but we only use + * it for one at the time... + */ +struct wuie_disconnect { + struct wuie_hdr hdr; + u8 bDeviceAddress; + u8 padding; +} __attribute__((packed)); + +/** + * WUSB IE: Host disconnect ([WUSB] section 7.5.5) + * + * Tells all connected devices to disconnect. + */ +struct wuie_host_disconnect { + struct wuie_hdr hdr; +} __attribute__((packed)); + +/** + * WUSB Device Notification header (WUSB1.0[7.6]) + */ +struct wusb_dn_hdr { + u8 bType; + u8 notifdata[]; +} __attribute__((packed)); + +/** Device Notification codes (WUSB1.0[Table 7-54]) */ +enum WUSB_DN { + WUSB_DN_CONNECT = 0x01, + WUSB_DN_DISCONNECT = 0x02, + WUSB_DN_EPRDY = 0x03, + WUSB_DN_MASAVAILCHANGED = 0x04, + WUSB_DN_RWAKE = 0x05, + WUSB_DN_SLEEP = 0x06, + WUSB_DN_ALIVE = 0x07, +}; + +/** WUSB Device Notification Connect */ +struct wusb_dn_connect { + struct wusb_dn_hdr hdr; + __le16 attributes; + struct wusb_ckhdid CDID; +} __attribute__((packed)); + +static inline int wusb_dn_connect_prev_dev_addr(const struct wusb_dn_connect *dn) +{ + return le16_to_cpu(dn->attributes) & 0xff; +} + +static inline int wusb_dn_connect_new_connection(const struct wusb_dn_connect *dn) +{ + return (le16_to_cpu(dn->attributes) >> 8) & 0x1; +} + +static inline int wusb_dn_connect_beacon_behavior(const struct wusb_dn_connect *dn) +{ + return (le16_to_cpu(dn->attributes) >> 9) & 0x03; +} + +/** Device is alive (aka: pong) (WUSB1.0[7.6.7]) */ +struct wusb_dn_alive { + struct wusb_dn_hdr hdr; +} __attribute__((packed)); + +/** Device is disconnecting (WUSB1.0[7.6.2]) */ +struct wusb_dn_disconnect { + struct wusb_dn_hdr hdr; +} __attribute__((packed)); + +/* General constants */ +enum { + WUSB_TRUST_TIMEOUT_MS = 4000, /* [WUSB] section 4.15.1 */ +}; + +static inline size_t ckhdid_printf(char *pr_ckhdid, size_t size, + const struct wusb_ckhdid *ckhdid) +{ + return scnprintf(pr_ckhdid, size, + "%02hx %02hx %02hx %02hx %02hx %02hx %02hx %02hx " + "%02hx %02hx %02hx %02hx %02hx %02hx %02hx %02hx", + ckhdid->data[0], ckhdid->data[1], + ckhdid->data[2], ckhdid->data[3], + ckhdid->data[4], ckhdid->data[5], + ckhdid->data[6], ckhdid->data[7], + ckhdid->data[8], ckhdid->data[9], + ckhdid->data[10], ckhdid->data[11], + ckhdid->data[12], ckhdid->data[13], + ckhdid->data[14], ckhdid->data[15]); +} + +/* + * WUSB Crypto stuff (WUSB1.0[6]) + */ + +extern const char *wusb_et_name(u8); + +/** + * WUSB key index WUSB1.0[7.3.2.4], for usage when setting keys for + * the host or the device. + */ +static inline u8 wusb_key_index(int index, int type, int originator) +{ + return (originator << 6) | (type << 4) | index; +} + +#define WUSB_KEY_INDEX_TYPE_PTK 0 /* for HWA only */ +#define WUSB_KEY_INDEX_TYPE_ASSOC 1 +#define WUSB_KEY_INDEX_TYPE_GTK 2 +#define WUSB_KEY_INDEX_ORIGINATOR_HOST 0 +#define WUSB_KEY_INDEX_ORIGINATOR_DEVICE 1 + +/* A CCM Nonce, defined in WUSB1.0[6.4.1] */ +struct aes_ccm_nonce { + u8 sfn[6]; /* Little Endian */ + u8 tkid[3]; /* LE */ + struct uwb_dev_addr dest_addr; + struct uwb_dev_addr src_addr; +} __attribute__((packed)); + +/* A CCM operation label, defined on WUSB1.0[6.5.x] */ +struct aes_ccm_label { + u8 data[14]; +} __attribute__((packed)); + +/* + * Input to the key derivation sequence defined in + * WUSB1.0[6.5.1]. Rest of the data is in the CCM Nonce passed to the + * PRF function. + */ +struct wusb_keydvt_in { + u8 hnonce[16]; + u8 dnonce[16]; +} __attribute__((packed)); + +/* + * Output from the key derivation sequence defined in + * WUSB1.0[6.5.1]. + */ +struct wusb_keydvt_out { + u8 kck[16]; + u8 ptk[16]; +} __attribute__((packed)); + +/* Pseudo Random Function WUSB1.0[6.5] */ +extern int wusb_crypto_init(void); +extern void wusb_crypto_exit(void); +extern ssize_t wusb_prf(void *out, size_t out_size, + const u8 key[16], const struct aes_ccm_nonce *_n, + const struct aes_ccm_label *a, + const void *b, size_t blen, size_t len); + +static inline int wusb_prf_64(void *out, size_t out_size, const u8 key[16], + const struct aes_ccm_nonce *n, + const struct aes_ccm_label *a, + const void *b, size_t blen) +{ + return wusb_prf(out, out_size, key, n, a, b, blen, 64); +} + +static inline int wusb_prf_128(void *out, size_t out_size, const u8 key[16], + const struct aes_ccm_nonce *n, + const struct aes_ccm_label *a, + const void *b, size_t blen) +{ + return wusb_prf(out, out_size, key, n, a, b, blen, 128); +} + +static inline int wusb_prf_256(void *out, size_t out_size, const u8 key[16], + const struct aes_ccm_nonce *n, + const struct aes_ccm_label *a, + const void *b, size_t blen) +{ + return wusb_prf(out, out_size, key, n, a, b, blen, 256); +} + +/* Key derivation WUSB1.0[6.5.1] */ +static inline int wusb_key_derive(struct wusb_keydvt_out *keydvt_out, + const u8 key[16], + const struct aes_ccm_nonce *n, + const struct wusb_keydvt_in *keydvt_in) +{ + const struct aes_ccm_label a = { .data = "Pair-wise keys" }; + return wusb_prf_256(keydvt_out, sizeof(*keydvt_out), key, n, &a, + keydvt_in, sizeof(*keydvt_in)); +} + +/* + * Out-of-band MIC Generation WUSB1.0[6.5.2] + * + * Compute the MIC over @key, @n and @hs and place it in @mic_out. + * + * @mic_out: Where to place the 8 byte MIC tag + * @key: KCK from the derivation process + * @n: CCM nonce, n->sfn == 0, TKID as established in the + * process. + * @hs: Handshake struct for phase 2 of the 4-way. + * hs->bStatus and hs->bReserved are zero. + * hs->bMessageNumber is 2 (WUSB1.0[7.3.2.5.2] + * hs->dest_addr is the device's USB address padded with 0 + * hs->src_addr is the hosts's UWB device address + * hs->mic is ignored (as we compute that value). + */ +static inline int wusb_oob_mic(u8 mic_out[8], const u8 key[16], + const struct aes_ccm_nonce *n, + const struct usb_handshake *hs) +{ + const struct aes_ccm_label a = { .data = "out-of-bandMIC" }; + return wusb_prf_64(mic_out, 8, key, n, &a, + hs, sizeof(*hs) - sizeof(hs->MIC)); +} + +#endif /* #ifndef __WUSB_H__ */ -- cgit v1.2.3 From b60066c141997ac2e4ef08459b75638ae86ae781 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 17 Sep 2008 16:34:40 +0100 Subject: uwb: add symlinks in sysfs between radio controllers and PALs Add a facility for PALs to have symlinks to their radio controller (and vice-versa) and make WUSB host controllers use this. Signed-off-by: David Vrabel --- drivers/usb/wusbcore/pal.c | 3 +++ drivers/usb/wusbcore/wusbhc.c | 16 +++++++++------- drivers/uwb/pal.c | 20 ++++++++++++++++++++ include/linux/uwb.h | 6 +++++- 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c index cc126b444734..7cc51e9905cf 100644 --- a/drivers/usb/wusbcore/pal.c +++ b/drivers/usb/wusbcore/pal.c @@ -26,6 +26,9 @@ int wusbhc_pal_register(struct wusbhc *wusbhc) { uwb_pal_init(&wusbhc->pal); + wusbhc->pal.name = "wusbhc"; + wusbhc->pal.device = wusbhc->usb_hcd.self.controller; + return uwb_pal_register(wusbhc->uwb_rc, &wusbhc->pal); } diff --git a/drivers/usb/wusbcore/wusbhc.c b/drivers/usb/wusbcore/wusbhc.c index 1149b1e59c86..07c63a31c799 100644 --- a/drivers/usb/wusbcore/wusbhc.c +++ b/drivers/usb/wusbcore/wusbhc.c @@ -192,13 +192,8 @@ int wusbhc_create(struct wusbhc *wusbhc) result = wusbhc_sec_create(wusbhc); if (result < 0) goto error_sec_create; - result = wusbhc_pal_register(wusbhc); - if (result < 0) - goto error_pal_register; return 0; -error_pal_register: - wusbhc_sec_destroy(wusbhc); error_sec_create: wusbhc_rh_destroy(wusbhc); error_rh_create: @@ -235,7 +230,14 @@ int wusbhc_b_create(struct wusbhc *wusbhc) dev_err(dev, "Cannot register WUSBHC attributes: %d\n", result); goto error_create_attr_group; } - /* Yep, I plan to add stuff here... */ + + result = wusbhc_pal_register(wusbhc); + if (result < 0) + goto error_pal_register; + return 0; + +error_pal_register: + sysfs_remove_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group); error_create_attr_group: return result; } @@ -243,13 +245,13 @@ EXPORT_SYMBOL_GPL(wusbhc_b_create); void wusbhc_b_destroy(struct wusbhc *wusbhc) { + wusbhc_pal_unregister(wusbhc); sysfs_remove_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group); } EXPORT_SYMBOL_GPL(wusbhc_b_destroy); void wusbhc_destroy(struct wusbhc *wusbhc) { - wusbhc_pal_unregister(wusbhc); wusbhc_sec_destroy(wusbhc); wusbhc_rh_destroy(wusbhc); wusbhc_devconnect_destroy(wusbhc); diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c index 5508993a820e..1afb38eacb9a 100644 --- a/drivers/uwb/pal.c +++ b/drivers/uwb/pal.c @@ -39,6 +39,21 @@ EXPORT_SYMBOL_GPL(uwb_pal_init); */ int uwb_pal_register(struct uwb_rc *rc, struct uwb_pal *pal) { + int ret; + + if (pal->device) { + ret = sysfs_create_link(&pal->device->kobj, + &rc->uwb_dev.dev.kobj, "uwb_rc"); + if (ret < 0) + return ret; + ret = sysfs_create_link(&rc->uwb_dev.dev.kobj, + &pal->device->kobj, pal->name); + if (ret < 0) { + sysfs_remove_link(&pal->device->kobj, "uwb_rc"); + return ret; + } + } + spin_lock(&rc->pal_lock); list_add(&pal->node, &rc->pals); spin_unlock(&rc->pal_lock); @@ -57,6 +72,11 @@ void uwb_pal_unregister(struct uwb_rc *rc, struct uwb_pal *pal) spin_lock(&rc->pal_lock); list_del(&pal->node); spin_unlock(&rc->pal_lock); + + if (pal->device) { + sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name); + sysfs_remove_link(&pal->device->kobj, "uwb_rc"); + } } EXPORT_SYMBOL_GPL(uwb_pal_unregister); diff --git a/include/linux/uwb.h b/include/linux/uwb.h index 0cd35937e120..f9ccbd9a2ced 100644 --- a/include/linux/uwb.h +++ b/include/linux/uwb.h @@ -361,6 +361,9 @@ struct uwb_rc { /** * struct uwb_pal - a UWB PAL + * @name: descriptive name for this PAL (wushc, wlp, etc.). + * @device: a device for the PAL. Used to link the PAL and the radio + * controller in sysfs. * @new_rsv: called when a peer requests a reservation (may be NULL if * the PAL cannot accept reservation requests). * @@ -379,7 +382,8 @@ struct uwb_rc { */ struct uwb_pal { struct list_head node; - + const char *name; + struct device *device; void (*new_rsv)(struct uwb_rsv *rsv); }; -- cgit v1.2.3 From d7cfb60c5cf904ecf1e0ae23ec178175b86f0d4a Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 19 Sep 2008 13:13:44 +0100 Subject: hrtimer: remove hrtimer_clock_base::get_softirq_time() Peter Zijlstra noticed this 8 months ago and I just noticed it again. hrtimer_clock_base::get_softirq_time() is currently unused in the entire tree. In fact, looking at the logs, it appears as if it was never used. Remove it. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 2 -- kernel/hrtimer.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6d93dce61cbb..1b079bd29c35 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -145,7 +145,6 @@ struct hrtimer_sleeper { * @first: pointer to the timer node which expires first * @resolution: the resolution of the clock, in nanoseconds * @get_time: function to retrieve the current time of the clock - * @get_softirq_time: function to retrieve the current time from the softirq * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base * @reprogram: function to reprogram the timer event @@ -157,7 +156,6 @@ struct hrtimer_clock_base { struct rb_node *first; ktime_t resolution; ktime_t (*get_time)(void); - ktime_t (*get_softirq_time)(void); ktime_t softirq_time; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t offset; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 03ea1378c43b..4d761d50c529 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1401,9 +1401,7 @@ void hrtimer_run_queues(void) if (!base->first) continue; - if (base->get_softirq_time) - base->softirq_time = base->get_softirq_time(); - else if (gettime) { + if (gettime) { hrtimer_get_softirq_time(cpu_base); gettime = 0; } -- cgit v1.2.3 From b91c4996df56fcd201f85c392a1de7bc3f6641f5 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 19 Sep 2008 13:13:48 +0100 Subject: hrtimer: remove hrtimer_clock_base::reprogram() hrtimer_clock_base::reprogram() also appears to never have been used, so remove it. Signed-off-by: Mark McLoughlin Signed-off-by: Ingo Molnar --- include/linux/hrtimer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 1b079bd29c35..68b0196d8696 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -147,7 +147,6 @@ struct hrtimer_sleeper { * @get_time: function to retrieve the current time of the clock * @softirq_time: the time when running the hrtimer queue in the softirq * @offset: offset of this clock to the monotonic base - * @reprogram: function to reprogram the timer event */ struct hrtimer_clock_base { struct hrtimer_cpu_base *cpu_base; @@ -159,9 +158,6 @@ struct hrtimer_clock_base { ktime_t softirq_time; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t offset; - int (*reprogram)(struct hrtimer *t, - struct hrtimer_clock_base *b, - ktime_t n); #endif }; -- cgit v1.2.3 From a0ad05c75aa362c91f4d9cd91ff375a739574dd8 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Sep 2008 14:27:02 +0200 Subject: Introduce FW_BUG, FW_WARN and FW_INFO to consistenly tell users about BIOS bugs The idea is to add this to printk after the severity: printk(KERN_ERR FW_BUG "This is not our fault, BIOS developer: fix it by simply add ...\n"); If a Firmware issue should be hidden, because it is work-arounded, but you still want to see something popping up e.g. for info only: printk(KERN_INFO FW_INFO "This is done stupid, we can handle it, but it should better be avoided in future\n"); or on the Linuxfirmwarekit to tell vendors that they did something stupid or wrong without bothering the user: printk(KERN_INFO FW_BUG "This is done stupid, we can handle it, but it should better be avoided in future\n"); Some use cases: - If a user sees a [Firmware Bug] message in the kernel he should first update the BIOS before wasting time with debugging and submiting on old firmware code to mailing lists. - The linuxfirmwarekit (http://www.linuxfirmwarekit.org) tries to detect firmware bugs. It currently is doing that in userspace which results in: - Huge test scripts that could be a one liner in the kernel - A lot of BIOS bugs are already absorbed by the kernel What do we need such a stupid linuxfirmwarekit for? - Vendors: Can test their BIOSes for Linux compatibility. There will be the time when vendors realize that the test utils on Linux are more strict and using them increases the qualitity and stability of their products. - Vendors: Can easily fix up their BIOSes and be more Linux compatible by: dmesg |grep "Firmware Bug" and send the result to their BIOS developer colleagues who should know what the messages are about and how to fix them, without the need of studying kernel code. - Distributions: can do a first automated HW/BIOS checks. This can then be done without the need of asking kernel developers who need to dig down the code and explain the details. Certification can/will just be rejected until dmesg |grep "Firmware Bug" is empty. - Thus this can be used as an instrument to enforce cleaner BIOS code. Currently every stupid Windows ACPI bug is re-implemented in Linux which is a rather unfortunate situation. We already have the power to avoid this in e.g. memory or cpu hot-plug ACPI implementations, because Linux certification is a must for most vendors in the server area. Working towards being able to do that in the laptop area (vendors are starting to look at Linux here also and will use this tool) is the goal. At least provide them a tool to make it as easy for this guys (e.g. not needing to browse kernel code) as possible. - The ordinary Linux user: can go into the next shop, boots the firmwarekit on his most preferred machines. He chooses one without BIOS bugs. Unsupported HW is ok, he likes to try out latest projects which might support them or likes to dig on it on his own, but he hates to workaround broken BIOSes like hell. I double checked with the firmwarekit. There they have: So the mapping generally is (also depending on how likely the BIOS is to blame, this could sometimes be difficult): FW_INFO = INFO FW_WARN = WARN FW_BUG = FAIL For more info about the linuxfirmwarekit and why this is needed can be found here: http://www.linuxfirmwarekit.org While severity matches with the firmwarekit, it might be tricky to hide messages from the user. E.g. we recently found out that on HP BIOSes negative temperatures are returned, which seem to indicate that the thermal zone is invalid. We can work around that gracefully by ignoring the thermal zone and we do not want to bother the ordinary user with a frightening message: Firmware Bug: thermal management absolutely broken but want to hide it from the user. But in the linuxfirmwarekit this should be shown as a real show stopper (the temperatures could really be wrong, broken thermal management is one of the worst things that can happen and the BIOS guys of the machine must implement this properly). It is intended to do that (hide it from the user with KERN_INFO msg, but still print it as a BIOS bug) by: printk(KERN_INFO FW_BUG "Negativ temperature values detected. Try to workarounded, BIOS must get fixed\n"); Hope that works out..., no idea how to better hide it as printk is the only way to easily provide this functionality. Signed-off-by: Thomas Renninger Signed-off-by: Andi Kleen Signed-off-by: Len Brown --- include/linux/kernel.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..0b19848e380e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -190,6 +190,30 @@ extern int kernel_text_address(unsigned long addr); struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); +/* + * FW_BUG + * Add this to a message where you are sure the firmware is buggy or behaves + * really stupid or out of spec. Be aware that the responsible BIOS developer + * should be able to fix this issue or at least get a concrete idea of the + * problem by reading your message without the need of looking at the kernel + * code. + * + * Use it for definite and high priority BIOS bugs. + * + * FW_WARN + * Use it for not that clear (e.g. could the kernel messed up things already?) + * and medium priority BIOS bugs. + * + * FW_INFO + * Use this one if you want to tell the user or vendor about something + * suspicious, but generally harmless related to the firmware. + * + * Use it for information or very low priority BIOS bugs. + */ +#define FW_BUG "[Firmware Bug]: " +#define FW_WARN "[Firmware Warn]: " +#define FW_INFO "[Firmware Info]: " + #ifdef CONFIG_PRINTK asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); -- cgit v1.2.3 From bb34d92f643086d546b49cef680f6f305ed84414 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang, v2 This is the second resubmission of the posix timer rework patch, posted a few days ago. This includes the changes from the previous resubmittion, which addressed Oleg Nesterov's comments, removing the RCU stuff from the patch and un-inlining the thread_group_cputime() function for SMP. In addition, per Ingo Molnar it simplifies the UP code, consolidating much of it with the SMP version and depending on lower-level SMP/UP handling to take care of the differences. It also cleans up some UP compile errors, moves the scheduler stats-related macros into kernel/sched_stats.h, cleans up a merge error in kernel/fork.c and has a few other minor fixes and cleanups as suggested by Oleg and Ingo. Thanks for the review, guys. Signed-off-by: Frank Mayhar Cc: Roland McGrath Cc: Alexey Dobriyan Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/kernel_stat.h | 1 + include/linux/sched.h | 183 ++------------------------------------------ kernel/fork.c | 5 +- kernel/posix-cpu-timers.c | 153 ++++++++++++++++-------------------- kernel/sched.c | 47 ++---------- kernel/sched_stats.h | 136 ++++++++++++++++++++++++++++++++ 6 files changed, 214 insertions(+), 311 deletions(-) (limited to 'include') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index cf9f40a91c9c..cac3750cd65e 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -52,6 +52,7 @@ static inline int kstat_irqs(int irq) return sum; } +extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t); extern void account_user_time_scaled(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); diff --git a/include/linux/sched.h b/include/linux/sched.h index 7ce8d4e53565..b982fb48c8f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -454,15 +454,9 @@ struct task_cputime { * This structure contains the version of task_cputime, above, that is * used for thread group CPU clock calculations. */ -#ifdef CONFIG_SMP struct thread_group_cputime { struct task_cputime *totals; }; -#else -struct thread_group_cputime { - struct task_cputime totals; -}; -#endif /* * NOTE! "signal_struct" does not have it's own @@ -2124,193 +2118,26 @@ static inline int spin_needbreak(spinlock_t *lock) /* * Thread group CPU time accounting. */ -#ifdef CONFIG_SMP -extern int thread_group_cputime_alloc_smp(struct task_struct *); -extern void thread_group_cputime_smp(struct task_struct *, struct task_cputime *); +extern int thread_group_cputime_alloc(struct task_struct *); +extern void thread_group_cputime(struct task_struct *, struct task_cputime *); static inline void thread_group_cputime_init(struct signal_struct *sig) { sig->cputime.totals = NULL; } -static inline int thread_group_cputime_clone_thread(struct task_struct *curr, - struct task_struct *new) +static inline int thread_group_cputime_clone_thread(struct task_struct *curr) { if (curr->signal->cputime.totals) return 0; - return thread_group_cputime_alloc_smp(curr); + return thread_group_cputime_alloc(curr); } -static inline void thread_group_cputime_free(struct signal_struct *sig) -{ - free_percpu(sig->cputime.totals); -} - -/** - * thread_group_cputime - Sum the thread group time fields across all CPUs. - * - * This is a wrapper for the real routine, thread_group_cputime_smp(). See - * that routine for details. - */ -static inline void thread_group_cputime( - struct task_struct *tsk, - struct task_cputime *times) -{ - thread_group_cputime_smp(tsk, times); -} - -/** - * thread_group_cputime_account_user - Maintain utime for a thread group. - * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the utime field of that - * structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the utime field there. - */ -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - if (tgtimes->totals) { - struct task_cputime *times; - - times = per_cpu_ptr(tgtimes->totals, get_cpu()); - times->utime = cputime_add(times->utime, cputime); - put_cpu_no_resched(); - } -} - -/** - * thread_group_cputime_account_system - Maintain stime for a thread group. - * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the stime field of that - * structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the stime field there. - */ -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - if (tgtimes->totals) { - struct task_cputime *times; - - times = per_cpu_ptr(tgtimes->totals, get_cpu()); - times->stime = cputime_add(times->stime, cputime); - put_cpu_no_resched(); - } -} - -/** - * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a - * thread group. - * - * @tgtimes: Pointer to thread_group_cputime structure. - * @ns: Time value by which to increment the sum_exec_runtime field - * of that structure. - * - * If thread group time is being maintained, get the structure for the - * running CPU and update the sum_exec_runtime field there. - */ -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) -{ - if (tgtimes->totals) { - struct task_cputime *times; - - times = per_cpu_ptr(tgtimes->totals, get_cpu()); - times->sum_exec_runtime += ns; - put_cpu_no_resched(); - } -} - -#else /* CONFIG_SMP */ - -static inline void thread_group_cputime_init(struct signal_struct *sig) -{ - sig->cputime.totals.utime = cputime_zero; - sig->cputime.totals.stime = cputime_zero; - sig->cputime.totals.sum_exec_runtime = 0; -} - -static inline int thread_group_cputime_alloc(struct task_struct *tsk) -{ - return 0; -} static inline void thread_group_cputime_free(struct signal_struct *sig) { -} - -static inline int thread_group_cputime_clone_thread(struct task_struct *curr, - struct task_struct *tsk) -{ - return 0; -} - -static inline void thread_group_cputime(struct task_struct *tsk, - struct task_cputime *cputime) -{ - *cputime = tsk->signal->cputime.totals; -} - -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals.utime = cputime_add(tgtimes->totals.utime, cputime); -} - -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals.stime = cputime_add(tgtimes->totals.stime, cputime); -} - -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) -{ - tgtimes->totals.sum_exec_runtime += ns; -} - -#endif /* CONFIG_SMP */ - -static inline void account_group_user_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_user(&sig->cputime, cputime); -} - -static inline void account_group_system_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_system(&sig->cputime, cputime); -} - -static inline void account_group_exec_runtime(struct task_struct *tsk, - unsigned long long ns) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_exec_runtime(&sig->cputime, ns); + free_percpu(sig->cputime.totals); } /* diff --git a/kernel/fork.c b/kernel/fork.c index 1181b9aac48e..021ae012cc75 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -791,7 +791,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) int ret; if (clone_flags & CLONE_THREAD) { - ret = thread_group_cputime_clone_thread(current, tsk); + ret = thread_group_cputime_clone_thread(current); if (likely(!ret)) { atomic_inc(¤t->signal->count); atomic_inc(¤t->signal->live); @@ -834,9 +834,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; task_io_accounting_init(&sig->ioac); - INIT_LIST_HEAD(&sig->cpu_timers[0]); - INIT_LIST_HEAD(&sig->cpu_timers[1]); - INIT_LIST_HEAD(&sig->cpu_timers[2]); taskstats_tgid_init(sig); task_lock(current->group_leader); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 9a7ea049fcdc..153dcb2639c3 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -7,50 +7,46 @@ #include #include #include +#include -#ifdef CONFIG_SMP /* - * Allocate the thread_group_cputime structure appropriately for SMP kernels - * and fill in the current values of the fields. Called from copy_signal() - * via thread_group_cputime_clone_thread() when adding a second or subsequent + * Allocate the thread_group_cputime structure appropriately and fill in the + * current values of the fields. Called from copy_signal() via + * thread_group_cputime_clone_thread() when adding a second or subsequent * thread to a thread group. Assumes interrupts are enabled when called. */ -int thread_group_cputime_alloc_smp(struct task_struct *tsk) +int thread_group_cputime_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; struct task_cputime *cputime; /* * If we have multiple threads and we don't already have a - * per-CPU task_cputime struct, allocate one and fill it in with - * the times accumulated so far. + * per-CPU task_cputime struct (checked in the caller), allocate + * one and fill it in with the times accumulated so far. We may + * race with another thread so recheck after we pick up the sighand + * lock. */ - if (sig->cputime.totals) - return 0; cputime = alloc_percpu(struct task_cputime); if (cputime == NULL) return -ENOMEM; - read_lock(&tasklist_lock); spin_lock_irq(&tsk->sighand->siglock); if (sig->cputime.totals) { spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); free_percpu(cputime); return 0; } sig->cputime.totals = cputime; - cputime = per_cpu_ptr(sig->cputime.totals, get_cpu()); + cputime = per_cpu_ptr(sig->cputime.totals, smp_processor_id()); cputime->utime = tsk->utime; cputime->stime = tsk->stime; cputime->sum_exec_runtime = tsk->se.sum_exec_runtime; - put_cpu_no_resched(); spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); return 0; } /** - * thread_group_cputime_smp - Sum the thread group time fields across all CPUs. + * thread_group_cputime - Sum the thread group time fields across all CPUs. * * @tsk: The task we use to identify the thread group. * @times: task_cputime structure in which we return the summed fields. @@ -58,7 +54,7 @@ int thread_group_cputime_alloc_smp(struct task_struct *tsk) * Walk the list of CPUs to sum the per-CPU time fields in the thread group * time structure. */ -void thread_group_cputime_smp( +void thread_group_cputime( struct task_struct *tsk, struct task_cputime *times) { @@ -83,8 +79,6 @@ void thread_group_cputime_smp( } } -#endif /* CONFIG_SMP */ - /* * Called after updating RLIMIT_CPU to set timer expiration if necessary. */ @@ -300,7 +294,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, cpu->cpu = virt_ticks(p); break; case CPUCLOCK_SCHED: - cpu->sched = task_sched_runtime(p); + cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); break; } return 0; @@ -309,16 +303,15 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, /* * Sample a process (thread group) clock for the given group_leader task. * Must be called with tasklist_lock held for reading. - * Must be called with tasklist_lock held for reading, and p->sighand->siglock. */ -static int cpu_clock_sample_group_locked(unsigned int clock_idx, - struct task_struct *p, - union cpu_time_count *cpu) +static int cpu_clock_sample_group(const clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) { struct task_cputime cputime; thread_group_cputime(p, &cputime); - switch (clock_idx) { + switch (which_clock) { default: return -EINVAL; case CPUCLOCK_PROF: @@ -328,29 +321,12 @@ static int cpu_clock_sample_group_locked(unsigned int clock_idx, cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = thread_group_sched_runtime(p); + cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); break; } return 0; } -/* - * Sample a process (thread group) clock for the given group_leader task. - * Must be called with tasklist_lock held for reading. - */ -static int cpu_clock_sample_group(const clockid_t which_clock, - struct task_struct *p, - union cpu_time_count *cpu) -{ - int ret; - unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - ret = cpu_clock_sample_group_locked(CPUCLOCK_WHICH(which_clock), p, - cpu); - spin_unlock_irqrestore(&p->sighand->siglock, flags); - return ret; -} - int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) { @@ -1324,29 +1300,37 @@ static inline int task_cputime_expired(const struct task_cputime *sample, * fastpath_timer_check - POSIX CPU timers fast path. * * @tsk: The task (thread) being checked. - * @sig: The signal pointer for that task. * - * If there are no timers set return false. Otherwise snapshot the task and - * thread group timers, then compare them with the corresponding expiration - # times. Returns true if a timer has expired, else returns false. + * Check the task and thread group timers. If both are zero (there are no + * timers set) return false. Otherwise snapshot the task and thread group + * timers and compare them with the corresponding expiration times. Return + * true if a timer has expired, else return false. */ -static inline int fastpath_timer_check(struct task_struct *tsk, - struct signal_struct *sig) +static inline int fastpath_timer_check(struct task_struct *tsk) { - struct task_cputime task_sample = { - .utime = tsk->utime, - .stime = tsk->stime, - .sum_exec_runtime = tsk->se.sum_exec_runtime - }; - struct task_cputime group_sample; + struct signal_struct *sig = tsk->signal; - if (task_cputime_zero(&tsk->cputime_expires) && - task_cputime_zero(&sig->cputime_expires)) + if (unlikely(!sig)) return 0; - if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) - return 1; - thread_group_cputime(tsk, &group_sample); - return task_cputime_expired(&group_sample, &sig->cputime_expires); + + if (!task_cputime_zero(&tsk->cputime_expires)) { + struct task_cputime task_sample = { + .utime = tsk->utime, + .stime = tsk->stime, + .sum_exec_runtime = tsk->se.sum_exec_runtime + }; + + if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) + return 1; + } + if (!task_cputime_zero(&sig->cputime_expires)) { + struct task_cputime group_sample; + + thread_group_cputime(tsk, &group_sample); + if (task_cputime_expired(&group_sample, &sig->cputime_expires)) + return 1; + } + return 0; } /* @@ -1358,43 +1342,34 @@ void run_posix_cpu_timers(struct task_struct *tsk) { LIST_HEAD(firing); struct k_itimer *timer, *next; - struct signal_struct *sig; - struct sighand_struct *sighand; - unsigned long flags; BUG_ON(!irqs_disabled()); - /* Pick up tsk->signal and make sure it's valid. */ - sig = tsk->signal; /* * The fast path checks that there are no expired thread or thread - * group timers. If that's so, just return. Also check that - * tsk->signal is non-NULL; this probably can't happen but cover the - * possibility anyway. + * group timers. If that's so, just return. */ - if (unlikely(!sig) || !fastpath_timer_check(tsk, sig)) + if (!fastpath_timer_check(tsk)) return; - sighand = lock_task_sighand(tsk, &flags); - if (likely(sighand)) { - /* - * Here we take off tsk->signal->cpu_timers[N] and - * tsk->cpu_timers[N] all the timers that are firing, and - * put them on the firing list. - */ - check_thread_timers(tsk, &firing); - check_process_timers(tsk, &firing); + spin_lock(&tsk->sighand->siglock); + /* + * Here we take off tsk->signal->cpu_timers[N] and + * tsk->cpu_timers[N] all the timers that are firing, and + * put them on the firing list. + */ + check_thread_timers(tsk, &firing); + check_process_timers(tsk, &firing); - /* - * We must release these locks before taking any timer's lock. - * There is a potential race with timer deletion here, as the - * siglock now protects our private firing list. We have set - * the firing flag in each timer, so that a deletion attempt - * that gets the timer lock before we do will give it up and - * spin until we've taken care of that timer below. - */ - } - unlock_task_sighand(tsk, &flags); + /* + * We must release these locks before taking any timer's lock. + * There is a potential race with timer deletion here, as the + * siglock now protects our private firing list. We have set + * the firing flag in each timer, so that a deletion attempt + * that gets the timer lock before we do will give it up and + * spin until we've taken care of that timer below. + */ + spin_unlock(&tsk->sighand->siglock); /* * Now that all the timers on our list have the firing flag, @@ -1433,7 +1408,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, struct list_head *head; BUG_ON(clock_idx == CPUCLOCK_SCHED); - cpu_clock_sample_group_locked(clock_idx, tsk, &now); + cpu_clock_sample_group(clock_idx, tsk, &now); if (oldval) { if (!cputime_eq(*oldval, cputime_zero)) { diff --git a/kernel/sched.c b/kernel/sched.c index c51b5d276665..260c22cc530a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4039,55 +4039,22 @@ EXPORT_PER_CPU_SYMBOL(kstat); /* * Return any ns on the sched_clock that have not yet been banked in * @p in case that task is currently running. - * - * Called with task_rq_lock() held on @rq. */ -static unsigned long long task_delta_exec(struct task_struct *p, struct rq *rq) +unsigned long long task_delta_exec(struct task_struct *p) { + struct rq *rq; + unsigned long flags; + u64 ns = 0; + + rq = task_rq_lock(p, &flags); if (task_current(rq, p)) { u64 delta_exec; update_rq_clock(rq); delta_exec = rq->clock - p->se.exec_start; if ((s64)delta_exec > 0) - return delta_exec; + ns = delta_exec; } - return 0; -} - -/* - * Return p->sum_exec_runtime plus any more ns on the sched_clock - * that have not yet been banked in case the task is currently running. - */ -unsigned long long task_sched_runtime(struct task_struct *p) -{ - unsigned long flags; - u64 ns; - struct rq *rq; - - rq = task_rq_lock(p, &flags); - ns = p->se.sum_exec_runtime + task_delta_exec(p, rq); - task_rq_unlock(rq, &flags); - - return ns; -} - -/* - * Return sum_exec_runtime for the thread group plus any more ns on the - * sched_clock that have not yet been banked in case the task is currently - * running. - */ -unsigned long long thread_group_sched_runtime(struct task_struct *p) -{ - unsigned long flags; - u64 ns; - struct rq *rq; - struct task_cputime totals; - - rq = task_rq_lock(p, &flags); - thread_group_cputime(p, &totals); - ns = totals.sum_exec_runtime + task_delta_exec(p, rq); - task_rq_unlock(rq, &flags); return ns; } diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8385d43987e2..d6903bd0c7a8 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -270,3 +270,139 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) #define sched_info_switch(t, next) do { } while (0) #endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ +/* + * The following are functions that support scheduler-internal time accounting. + * These functions are generally called at the timer tick. None of this depends + * on CONFIG_SCHEDSTATS. + */ + +#ifdef CONFIG_SMP + +/** + * thread_group_cputime_account_user - Maintain utime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the utime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the utime field there. + */ +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->utime = cputime_add(times->utime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_system - Maintain stime for a thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @cputime: Time value by which to increment the stime field of that + * structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the stime field there. + */ +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->stime = cputime_add(times->stime, cputime); + put_cpu_no_resched(); + } +} + +/** + * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a + * thread group. + * + * @tgtimes: Pointer to thread_group_cputime structure. + * @ns: Time value by which to increment the sum_exec_runtime field + * of that structure. + * + * If thread group time is being maintained, get the structure for the + * running CPU and update the sum_exec_runtime field there. + */ +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + if (tgtimes->totals) { + struct task_cputime *times; + + times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times->sum_exec_runtime += ns; + put_cpu_no_resched(); + } +} + +#else /* CONFIG_SMP */ + +static inline void thread_group_cputime_account_user( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); +} + +static inline void thread_group_cputime_account_system( + struct thread_group_cputime *tgtimes, + cputime_t cputime) +{ + tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); +} + +static inline void thread_group_cputime_account_exec_runtime( + struct thread_group_cputime *tgtimes, + unsigned long long ns) +{ + tgtimes->totals->sum_exec_runtime += ns; +} + +#endif /* CONFIG_SMP */ + +/* + * These are the generic time-accounting routines that use the above + * functions. They are the functions actually called by the scheduler. + */ +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_user(&sig->cputime, cputime); +} + +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_system(&sig->cputime, cputime); +} + +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) +{ + struct signal_struct *sig; + + sig = tsk->signal; + if (likely(sig)) + thread_group_cputime_account_exec_runtime(&sig->cputime, ns); +} -- cgit v1.2.3 From 6bd00a61ab63d4ceb635ae0316353c11c900b8d8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 28 Aug 2008 10:04:29 +0800 Subject: ACPI: introduce notifier change to avoid duplicates The battery driver already registers notification handler. To avoid registering notification handler again, introduce a notifier chain in global system notifier handler and use it in dock driver. Signed-off-by: Shaohua Li Signed-off-by: Len Brown --- drivers/acpi/bus.c | 15 +++++++++++++++ drivers/acpi/dock.c | 46 ++++++++++++++++++++++++---------------------- include/acpi/acpi_bus.h | 3 +++ 3 files changed, 42 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index ccae305ee55d..0dc44945725e 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -496,6 +496,19 @@ static int acpi_bus_check_scope(struct acpi_device *device) return 0; } +static BLOCKING_NOTIFIER_HEAD(acpi_bus_notify_list); +int register_acpi_bus_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&acpi_bus_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_acpi_bus_notifier); + +void unregister_acpi_bus_notifier(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&acpi_bus_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_acpi_bus_notifier); + /** * acpi_bus_notify * --------------- @@ -506,6 +519,8 @@ static void acpi_bus_notify(acpi_handle handle, u32 type, void *data) int result = 0; struct acpi_device *device = NULL; + blocking_notifier_call_chain(&acpi_bus_notify_list, + type, (void *)handle); if (acpi_bus_get_device(handle, &device)) return; diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 799a0fdbb62d..2563bc62987d 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -748,6 +748,28 @@ static void dock_notify(acpi_handle handle, u32 event, void *data) } } +static int acpi_dock_notifier_call(struct notifier_block *this, + unsigned long event, void *data) +{ + struct dock_station *dock_station; + acpi_handle handle = (acpi_handle)data; + + if (event != ACPI_NOTIFY_BUS_CHECK && event != ACPI_NOTIFY_DEVICE_CHECK + && event != ACPI_NOTIFY_EJECT_REQUEST) + return 0; + list_for_each_entry(dock_station, &dock_stations, sibiling) { + if (dock_station->handle == handle) { + dock_notify(handle, event, dock_station); + return 0 ; + } + } + return 0; +} + +static struct notifier_block dock_acpi_notifier = { + .notifier_call = acpi_dock_notifier_call, +}; + /** * find_dock_devices - find devices on the dock station * @handle: the handle of the device we are examining @@ -861,7 +883,6 @@ static DEVICE_ATTR(uid, S_IRUGO, show_dock_uid, NULL); static int dock_add(acpi_handle handle) { int ret; - acpi_status status; struct dock_dependent_device *dd; struct dock_station *dock_station; struct platform_device *dock_device; @@ -956,23 +977,10 @@ static int dock_add(acpi_handle handle) } add_dock_dependent_device(dock_station, dd); - /* register for dock events */ - status = acpi_install_notify_handler(dock_station->handle, - ACPI_SYSTEM_NOTIFY, - dock_notify, dock_station); - - if (ACPI_FAILURE(status)) { - printk(KERN_ERR PREFIX "Error installing notify handler\n"); - ret = -ENODEV; - goto dock_add_err; - } - dock_station_count++; list_add(&dock_station->sibiling, &dock_stations); return 0; -dock_add_err: - kfree(dd); dock_add_err_unregister: device_remove_file(&dock_device->dev, &dev_attr_docked); device_remove_file(&dock_device->dev, &dev_attr_undock); @@ -990,7 +998,6 @@ dock_add_err_unregister: static int dock_remove(struct dock_station *dock_station) { struct dock_dependent_device *dd, *tmp; - acpi_status status; struct platform_device *dock_device = dock_station->dock_device; if (!dock_station_count) @@ -1001,13 +1008,6 @@ static int dock_remove(struct dock_station *dock_station) list) kfree(dd); - /* remove dock notify handler */ - status = acpi_remove_notify_handler(dock_station->handle, - ACPI_SYSTEM_NOTIFY, - dock_notify); - if (ACPI_FAILURE(status)) - printk(KERN_ERR "Error removing notify handler\n"); - /* cleanup sysfs */ device_remove_file(&dock_device->dev, &dev_attr_docked); device_remove_file(&dock_device->dev, &dev_attr_undock); @@ -1069,6 +1069,7 @@ static int __init dock_init(void) return 0; } + register_acpi_bus_notifier(&dock_acpi_notifier); printk(KERN_INFO PREFIX "%s: %d docks/bays found\n", ACPI_DOCK_DRIVER_DESCRIPTION, dock_station_count); return 0; @@ -1078,6 +1079,7 @@ static void __exit dock_exit(void) { struct dock_station *dock_station; + unregister_acpi_bus_notifier(&dock_acpi_notifier); list_for_each_entry(dock_station, &dock_stations, sibiling) dock_remove(dock_station); } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index a5ac0bc7f52e..f74f882609f8 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -327,6 +327,9 @@ int acpi_bus_get_private_data(acpi_handle, void **); extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32); extern int register_acpi_notifier(struct notifier_block *); extern int unregister_acpi_notifier(struct notifier_block *); + +extern int register_acpi_bus_notifier(struct notifier_block *nb); +extern void unregister_acpi_bus_notifier(struct notifier_block *nb); /* * External Functions */ -- cgit v1.2.3 From 19cd847ab24fefe9e50101ec94479e0400a08650 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 28 Aug 2008 10:05:06 +0800 Subject: ACPI: fix hotplug race The hotplug notification handler and drivers' notification handler all run in one workqueue. Before hotplug removes an acpi device, the device driver's notification handler is already be recorded to run just after global notification handler. After hotplug notification handler runs, acpica will notice a NULL notification handler and crash. So now we run run hotplug in another workqueue and wait for all acpi notication handlers finish. This was found in battery hotplug, but actually all hotplug can be affected. Signed-off-by: Zhang Rui Signed-off-by: Shaohua Li Signed-off-by: Len Brown --- drivers/acpi/dock.c | 25 ++++++++++++++++++++++++- drivers/acpi/osl.c | 46 +++++++++++++++++++++++++++++++++++++++++----- include/acpi/acpiosxf.h | 3 +++ 3 files changed, 68 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 2563bc62987d..4b395b1e61b2 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -748,6 +748,20 @@ static void dock_notify(acpi_handle handle, u32 event, void *data) } } +struct dock_data { + acpi_handle handle; + unsigned long event; + struct dock_station *ds; +}; + +static void acpi_dock_deferred_cb(void *context) +{ + struct dock_data *data = (struct dock_data *)context; + + dock_notify(data->handle, data->event, data->ds); + kfree(data); +} + static int acpi_dock_notifier_call(struct notifier_block *this, unsigned long event, void *data) { @@ -759,7 +773,16 @@ static int acpi_dock_notifier_call(struct notifier_block *this, return 0; list_for_each_entry(dock_station, &dock_stations, sibiling) { if (dock_station->handle == handle) { - dock_notify(handle, event, dock_station); + struct dock_data *dock_data; + + dock_data = kmalloc(sizeof(*dock_data), GFP_KERNEL); + if (!dock_data) + return 0; + dock_data->handle = handle; + dock_data->event = event; + dock_data->ds = dock_station; + acpi_os_hotplug_execute(acpi_dock_deferred_cb, + dock_data); return 0 ; } } diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 235a1386888a..750e0df15604 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -682,6 +682,22 @@ static void acpi_os_execute_deferred(struct work_struct *work) return; } +static void acpi_os_execute_hp_deferred(struct work_struct *work) +{ + struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work); + if (!dpc) { + printk(KERN_ERR PREFIX "Invalid (NULL) context\n"); + return; + } + + acpi_os_wait_events_complete(NULL); + + dpc->function(dpc->context); + kfree(dpc); + + return; +} + /******************************************************************************* * * FUNCTION: acpi_os_execute @@ -697,12 +713,13 @@ static void acpi_os_execute_deferred(struct work_struct *work) * ******************************************************************************/ -acpi_status acpi_os_execute(acpi_execute_type type, - acpi_osd_exec_callback function, void *context) +static acpi_status __acpi_os_execute(acpi_execute_type type, + acpi_osd_exec_callback function, void *context, int hp) { acpi_status status = AE_OK; struct acpi_os_dpc *dpc; struct workqueue_struct *queue; + int ret; ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Scheduling function [%p(%p)] for deferred execution.\n", function, context)); @@ -726,9 +743,17 @@ acpi_status acpi_os_execute(acpi_execute_type type, dpc->function = function; dpc->context = context; - INIT_WORK(&dpc->work, acpi_os_execute_deferred); - queue = (type == OSL_NOTIFY_HANDLER) ? kacpi_notify_wq : kacpid_wq; - if (!queue_work(queue, &dpc->work)) { + if (!hp) { + INIT_WORK(&dpc->work, acpi_os_execute_deferred); + queue = (type == OSL_NOTIFY_HANDLER) ? + kacpi_notify_wq : kacpid_wq; + ret = queue_work(queue, &dpc->work); + } else { + INIT_WORK(&dpc->work, acpi_os_execute_hp_deferred); + ret = schedule_work(&dpc->work); + } + + if (!ret) { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Call to queue_work() failed.\n")); status = AE_ERROR; @@ -737,8 +762,19 @@ acpi_status acpi_os_execute(acpi_execute_type type, return_ACPI_STATUS(status); } +acpi_status acpi_os_execute(acpi_execute_type type, + acpi_osd_exec_callback function, void *context) +{ + return __acpi_os_execute(type, function, context, 0); +} EXPORT_SYMBOL(acpi_os_execute); +acpi_status acpi_os_hotplug_execute(acpi_osd_exec_callback function, + void *context) +{ + return __acpi_os_execute(0, function, context, 1); +} + void acpi_os_wait_events_complete(void *context) { flush_workqueue(kacpid_wq); diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 3f93a6b4e17f..b91440ac0d16 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -193,6 +193,9 @@ acpi_status acpi_os_execute(acpi_execute_type type, acpi_osd_exec_callback function, void *context); +acpi_status +acpi_os_hotplug_execute(acpi_osd_exec_callback function, void *context); + void acpi_os_wait_events_complete(void *context); void acpi_os_sleep(acpi_integer milliseconds); -- cgit v1.2.3 From 1253f7aabfebc51446dbec5c8895c5c8846dfe06 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 28 Aug 2008 10:06:16 +0800 Subject: dock: introduce .uevent for devices in dock, eg libata dock's uevent reported itself, not ata. It might be difficult to find an ata device just according to a dock. This patch introduces docking ops for each device in a dock. when docking, dock driver can send device specific uevent. This should help dock station too (not just bay) Signed-off-by: Shaohua Li Acked-by: Tejun Heo Signed-off-by: Len Brown --- drivers/acpi/dock.c | 22 +++++++++++++------ drivers/ata/libata-acpi.c | 44 ++++++++++++++++++++++++++++++++++++-- drivers/pci/hotplug/acpiphp_glue.c | 6 ++++-- include/acpi/acpi_drivers.h | 9 ++++++-- 4 files changed, 68 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index f19f643fb362..ac7dfefcb50b 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -75,7 +75,7 @@ struct dock_dependent_device { struct list_head list; struct list_head hotplug_list; acpi_handle handle; - acpi_notify_handler handler; + struct acpi_dock_ops *ops; void *context; }; @@ -385,8 +385,8 @@ static void hotplug_dock_devices(struct dock_station *ds, u32 event) * First call driver specific hotplug functions */ list_for_each_entry(dd, &ds->hotplug_devices, hotplug_list) { - if (dd->handler) - dd->handler(dd->handle, event, dd->context); + if (dd->ops && dd->ops->handler) + dd->ops->handler(dd->handle, event, dd->context); } /* @@ -409,6 +409,7 @@ static void dock_event(struct dock_station *ds, u32 event, int num) struct device *dev = &ds->dock_device->dev; char event_string[13]; char *envp[] = { event_string, NULL }; + struct dock_dependent_device *dd; if (num == UNDOCK_EVENT) sprintf(event_string, "EVENT=undock"); @@ -419,7 +420,14 @@ static void dock_event(struct dock_station *ds, u32 event, int num) * Indicate that the status of the dock station has * changed. */ - kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); + if (num == DOCK_EVENT) + kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); + + list_for_each_entry(dd, &ds->hotplug_devices, hotplug_list) + if (dd->ops && dd->ops->uevent) + dd->ops->uevent(dd->handle, event, dd->context); + if (num != DOCK_EVENT) + kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); } /** @@ -588,7 +596,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier); /** * register_hotplug_dock_device - register a hotplug function * @handle: the handle of the device - * @handler: the acpi_notifier_handler to call after docking + * @ops: handlers to call after docking * @context: device specific data * * If a driver would like to perform a hotplug operation after a dock @@ -596,7 +604,7 @@ EXPORT_SYMBOL_GPL(unregister_dock_notifier); * the dock driver after _DCK is executed. */ int -register_hotplug_dock_device(acpi_handle handle, acpi_notify_handler handler, +register_hotplug_dock_device(acpi_handle handle, struct acpi_dock_ops *ops, void *context) { struct dock_dependent_device *dd; @@ -612,7 +620,7 @@ register_hotplug_dock_device(acpi_handle handle, acpi_notify_handler handler, list_for_each_entry(dock_station, &dock_stations, sibiling) { dd = find_dock_dependent_device(dock_station, handle); if (dd) { - dd->handler = handler; + dd->ops = ops; dd->context = context; dock_add_hotplug_device(dock_station, dd); return 0; diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 97727be7e158..c012307d0ba6 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -209,6 +209,46 @@ static void ata_acpi_ap_notify_dock(acpi_handle handle, u32 event, void *data) ata_acpi_handle_hotplug(ap, NULL, event); } +static void ata_acpi_uevent(struct ata_port *ap, struct ata_device *dev, + u32 event) +{ + struct kobject *kobj = NULL; + char event_string[20]; + char *envp[] = { event_string, NULL }; + + if (dev) { + if (dev->sdev) + kobj = &dev->sdev->sdev_gendev.kobj; + } else + kobj = &ap->dev->kobj; + + if (kobj) { + snprintf(event_string, 20, "BAY_EVENT=%d", event); + kobject_uevent_env(kobj, KOBJ_CHANGE, envp); + } +} + +static void ata_acpi_ap_uevent(acpi_handle handle, u32 event, void *data) +{ + ata_acpi_uevent(data, NULL, event); +} + +static void ata_acpi_dev_uevent(acpi_handle handle, u32 event, void *data) +{ + struct ata_device *dev = data; + ata_acpi_uevent(dev->link->ap, dev, event); +} + +static struct acpi_dock_ops ata_acpi_dev_dock_ops = { + .handler = ata_acpi_dev_notify_dock, + .uevent = ata_acpi_dev_uevent, +}; + +static struct acpi_dock_ops ata_acpi_ap_dock_ops = { + .handler = ata_acpi_ap_notify_dock, + .uevent = ata_acpi_ap_uevent, +}; + /** * ata_acpi_associate - associate ATA host with ACPI objects * @host: target ATA host @@ -244,7 +284,7 @@ void ata_acpi_associate(struct ata_host *host) if (ap->acpi_handle) { /* we might be on a docking station */ register_hotplug_dock_device(ap->acpi_handle, - ata_acpi_ap_notify_dock, ap); + &ata_acpi_ap_dock_ops, ap); } for (j = 0; j < ata_link_max_devices(&ap->link); j++) { @@ -253,7 +293,7 @@ void ata_acpi_associate(struct ata_host *host) if (dev->acpi_handle) { /* we might be on a docking station */ register_hotplug_dock_device(dev->acpi_handle, - ata_acpi_dev_notify_dock, dev); + &ata_acpi_dev_dock_ops, dev); } } } diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index a3e4705dd8f0..db54c5ef2aa5 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -169,7 +169,9 @@ static int post_dock_fixups(struct notifier_block *nb, unsigned long val, } - +static struct acpi_dock_ops acpiphp_dock_ops = { + .handler = handle_hotplug_event_func, +}; /* callback routine to register each ACPI PCI slot object */ static acpi_status @@ -285,7 +287,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) */ newfunc->flags &= ~FUNC_HAS_EJ0; if (register_hotplug_dock_device(handle, - handle_hotplug_event_func, newfunc)) + &acpiphp_dock_ops, newfunc)) dbg("failed to register dock device\n"); /* we need to be notified when dock events happen diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index e5f38e5ce86f..4f5042a0ef80 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -115,12 +115,17 @@ int acpi_processor_set_thermal_limit(acpi_handle handle, int type); /*-------------------------------------------------------------------------- Dock Station -------------------------------------------------------------------------- */ +struct acpi_dock_ops { + acpi_notify_handler handler; + acpi_notify_handler uevent; +}; + #if defined(CONFIG_ACPI_DOCK) || defined(CONFIG_ACPI_DOCK_MODULE) extern int is_dock_device(acpi_handle handle); extern int register_dock_notifier(struct notifier_block *nb); extern void unregister_dock_notifier(struct notifier_block *nb); extern int register_hotplug_dock_device(acpi_handle handle, - acpi_notify_handler handler, + struct acpi_dock_ops *ops, void *context); extern void unregister_hotplug_dock_device(acpi_handle handle); #else @@ -136,7 +141,7 @@ static inline void unregister_dock_notifier(struct notifier_block *nb) { } static inline int register_hotplug_dock_device(acpi_handle handle, - acpi_notify_handler handler, + struct acpi_dock_ops *ops, void *context) { return -ENODEV; -- cgit v1.2.3 From 5a9fa73072854981a5c05eb7ba18a96d49c2804f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 22 Sep 2008 14:42:50 -0700 Subject: posix-timers: kill ->it_sigev_signo and ->it_sigev_value With the recent changes ->it_sigev_signo and ->it_sigev_value are only used in sys_timer_create(), kill them. Signed-off-by: Oleg Nesterov Cc: mingo@elte.hu Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 2 -- kernel/posix-timers.c | 17 +++++++---------- 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index f9d8e9e94e9b..a7c721355549 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -45,8 +45,6 @@ struct k_itimer { int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ - int it_sigev_signo; /* signo word of sigevent struct */ - sigval_t it_sigev_value; /* value word of sigevent struct */ struct task_struct *it_process; /* process to send signal to */ struct sigqueue *sigq; /* signal queue entry. */ union { diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 7be385fe4eca..3eff47b0d8d5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -510,10 +510,6 @@ sys_timer_create(const clockid_t which_clock, error = -EFAULT; goto out; } - new_timer->it_sigev_notify = event.sigev_notify; - new_timer->it_sigev_signo = event.sigev_signo; - new_timer->it_sigev_value = event.sigev_value; - rcu_read_lock(); process = good_sigevent(&event); if (process) @@ -524,17 +520,18 @@ sys_timer_create(const clockid_t which_clock, goto out; } } else { - new_timer->it_sigev_notify = SIGEV_SIGNAL; - new_timer->it_sigev_signo = SIGALRM; - new_timer->it_sigev_value.sival_int = new_timer->it_id; + event.sigev_notify = SIGEV_SIGNAL; + event.sigev_signo = SIGALRM; + event.sigev_value.sival_int = new_timer->it_id; process = current->group_leader; get_task_struct(process); } - new_timer->sigq->info.si_code = SI_TIMER; + new_timer->it_sigev_notify = event.sigev_notify; + new_timer->sigq->info.si_signo = event.sigev_signo; + new_timer->sigq->info.si_value = event.sigev_value; new_timer->sigq->info.si_tid = new_timer->it_id; - new_timer->sigq->info.si_signo = new_timer->it_sigev_signo; - new_timer->sigq->info.si_value = new_timer->it_sigev_value; + new_timer->sigq->info.si_code = SI_TIMER; spin_lock_irq(¤t->sighand->siglock); new_timer->it_process = process; -- cgit v1.2.3 From 1b02469088ac7a13d7e622b618b7410d0f1ce5ec Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 22 Sep 2008 14:42:43 -0700 Subject: hrtimer: reorder struct hrtimer to save 8 bytes on 64bit builds reorder struct hrtimer to save 8 bytes on 64 bit builds when CONFIG_TIMER_STATS selected. (also removes 8 bytes from signal_struct) Signed-off-by: Richard Kennedy Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 68b0196d8696..8730b60c9432 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -115,12 +115,12 @@ struct hrtimer { enum hrtimer_restart (*function)(struct hrtimer *); struct hrtimer_clock_base *base; unsigned long state; - enum hrtimer_cb_mode cb_mode; struct list_head cb_entry; + enum hrtimer_cb_mode cb_mode; #ifdef CONFIG_TIMER_STATS + int start_pid; void *start_site; char start_comm[16]; - int start_pid; #endif }; -- cgit v1.2.3 From d40e944c25fb4642adb2a4c580a48218a9f3f824 Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Mon, 22 Sep 2008 14:42:44 -0700 Subject: ntp: improve adjtimex frequency rounding Change PPM_SCALE_INV_SHIFT so that it doesn't throw away any input bits (19 is the amount of the factor 2 in PPM_SCALE), the output frequency can then be calculated back to its input value, as the inverse divide produce a slightly larger value, which is then correctly rounded by the final shift. Reported-by: Martin Ziegler Signed-off-by: Roman Zippel Cc: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 2 +- kernel/time/ntp.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index c00bcdd3ae42..9007313b5b71 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -82,7 +82,7 @@ */ #define SHIFT_USEC 16 /* frequency offset scale (shift) */ #define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) -#define PPM_SCALE_INV_SHIFT 20 +#define PPM_SCALE_INV_SHIFT 19 #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ PPM_SCALE + 1) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 450a45cb01c1..ddb0465a6baa 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -406,9 +406,8 @@ adj_done: if (time_status & (STA_UNSYNC|STA_CLOCKERR)) result = TIME_ERROR; - txc->freq = shift_right((s32)(time_freq >> PPM_SCALE_INV_SHIFT) * - (s64)PPM_SCALE_INV, - NTP_SCALE_SHIFT); + txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) * + (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT); txc->maxerror = time_maxerror; txc->esterror = time_esterror; txc->status = time_status; -- cgit v1.2.3 From e416de5e61e1a9b7f987804cbb67230b5f5293c6 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 23 Sep 2008 17:25:10 +0100 Subject: Export the ROM enable/disable helpers .... so that they can be used by MTD map drivers. Lets us close #9420 Signed-off-by: Alan Cox Signed-off-by: David Woodhouse --- drivers/pci/rom.c | 6 ++++-- include/linux/pci.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index bd5c0e031398..1f5f6143f35c 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -21,7 +21,7 @@ * between the ROM and other resources, so enabling it may disable access * to MMIO registers or other card memory. */ -static int pci_enable_rom(struct pci_dev *pdev) +int pci_enable_rom(struct pci_dev *pdev) { struct resource *res = pdev->resource + PCI_ROM_RESOURCE; struct pci_bus_region region; @@ -45,7 +45,7 @@ static int pci_enable_rom(struct pci_dev *pdev) * Disable ROM decoding on a PCI device by turning off the last bit in the * ROM BAR. */ -static void pci_disable_rom(struct pci_dev *pdev) +void pci_disable_rom(struct pci_dev *pdev) { u32 rom_addr; pci_read_config_dword(pdev, pdev->rom_base_reg, &rom_addr); @@ -260,3 +260,5 @@ void pci_cleanup_rom(struct pci_dev *pdev) EXPORT_SYMBOL(pci_map_rom); EXPORT_SYMBOL(pci_unmap_rom); +EXPORT_SYMBOL_GPL(pci_enable_rom); +EXPORT_SYMBOL_GPL(pci_disable_rom); diff --git a/include/linux/pci.h b/include/linux/pci.h index c0e14008a3c2..7a4cee00c1d6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -631,6 +631,8 @@ int __must_check pci_assign_resource(struct pci_dev *dev, int i); int pci_select_bars(struct pci_dev *dev, unsigned long flags); /* ROM control related routines */ +int pci_enable_rom(struct pci_dev *pdev); +void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); size_t pci_get_rom_size(void __iomem *rom, size_t size); -- cgit v1.2.3 From 7086efe1c1536f6bc160e7d60a9bfd645b91f279 Mon Sep 17 00:00:00 2001 From: Frank Mayhar Date: Fri, 12 Sep 2008 09:54:39 -0700 Subject: timers: fix itimer/many thread hang, v3 - fix UP lockup - another set of UP/SMP cleanups and simplifications Signed-off-by: Frank Mayhar Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/sched.c | 1 - kernel/sched_stats.h | 126 +++++++++++++++----------------------------------- 3 files changed, 38 insertions(+), 90 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b982fb48c8f0..23d9d5464544 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2134,7 +2134,6 @@ static inline int thread_group_cputime_clone_thread(struct task_struct *curr) return thread_group_cputime_alloc(curr); } - static inline void thread_group_cputime_free(struct signal_struct *sig) { free_percpu(sig->cputime.totals); diff --git a/kernel/sched.c b/kernel/sched.c index 260c22cc530a..29a3152c45db 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4046,7 +4046,6 @@ unsigned long long task_delta_exec(struct task_struct *p) unsigned long flags; u64 ns = 0; - rq = task_rq_lock(p, &flags); if (task_current(rq, p)) { u64 delta_exec; diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index d6903bd0c7a8..b8c156979cf2 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -276,133 +276,83 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) * on CONFIG_SCHEDSTATS. */ -#ifdef CONFIG_SMP - /** - * thread_group_cputime_account_user - Maintain utime for a thread group. + * account_group_user_time - Maintain utime for a thread group. * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the utime field of that - * structure. + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the utime field of the + * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the utime field there. */ -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) +static inline void account_group_user_time(struct task_struct *tsk, + cputime_t cputime) { - if (tgtimes->totals) { + struct signal_struct *sig; + + sig = tsk->signal; + if (unlikely(!sig)) + return; + if (sig->cputime.totals) { struct task_cputime *times; - times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times = per_cpu_ptr(sig->cputime.totals, get_cpu()); times->utime = cputime_add(times->utime, cputime); put_cpu_no_resched(); } } /** - * thread_group_cputime_account_system - Maintain stime for a thread group. + * account_group_system_time - Maintain stime for a thread group. * - * @tgtimes: Pointer to thread_group_cputime structure. - * @cputime: Time value by which to increment the stime field of that - * structure. + * @tsk: Pointer to task structure. + * @cputime: Time value by which to increment the stime field of the + * thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the stime field there. */ -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) +static inline void account_group_system_time(struct task_struct *tsk, + cputime_t cputime) { - if (tgtimes->totals) { + struct signal_struct *sig; + + sig = tsk->signal; + if (unlikely(!sig)) + return; + if (sig->cputime.totals) { struct task_cputime *times; - times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times = per_cpu_ptr(sig->cputime.totals, get_cpu()); times->stime = cputime_add(times->stime, cputime); put_cpu_no_resched(); } } /** - * thread_group_cputime_account_exec_runtime - Maintain exec runtime for a - * thread group. + * account_group_exec_runtime - Maintain exec runtime for a thread group. * - * @tgtimes: Pointer to thread_group_cputime structure. + * @tsk: Pointer to task structure. * @ns: Time value by which to increment the sum_exec_runtime field - * of that structure. + * of the thread_group_cputime structure. * * If thread group time is being maintained, get the structure for the * running CPU and update the sum_exec_runtime field there. */ -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) +static inline void account_group_exec_runtime(struct task_struct *tsk, + unsigned long long ns) { - if (tgtimes->totals) { + struct signal_struct *sig; + + sig = tsk->signal; + if (unlikely(!sig)) + return; + if (sig->cputime.totals) { struct task_cputime *times; - times = per_cpu_ptr(tgtimes->totals, get_cpu()); + times = per_cpu_ptr(sig->cputime.totals, get_cpu()); times->sum_exec_runtime += ns; put_cpu_no_resched(); } } - -#else /* CONFIG_SMP */ - -static inline void thread_group_cputime_account_user( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals->utime = cputime_add(tgtimes->totals->utime, cputime); -} - -static inline void thread_group_cputime_account_system( - struct thread_group_cputime *tgtimes, - cputime_t cputime) -{ - tgtimes->totals->stime = cputime_add(tgtimes->totals->stime, cputime); -} - -static inline void thread_group_cputime_account_exec_runtime( - struct thread_group_cputime *tgtimes, - unsigned long long ns) -{ - tgtimes->totals->sum_exec_runtime += ns; -} - -#endif /* CONFIG_SMP */ - -/* - * These are the generic time-accounting routines that use the above - * functions. They are the functions actually called by the scheduler. - */ -static inline void account_group_user_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_user(&sig->cputime, cputime); -} - -static inline void account_group_system_time(struct task_struct *tsk, - cputime_t cputime) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_system(&sig->cputime, cputime); -} - -static inline void account_group_exec_runtime(struct task_struct *tsk, - unsigned long long ns) -{ - struct signal_struct *sig; - - sig = tsk->signal; - if (likely(sig)) - thread_group_cputime_account_exec_runtime(&sig->cputime, ns); -} -- cgit v1.2.3 From bbfbd8b151fe35c9a1180a7f5254c5d6b8387cc0 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 1 Oct 2008 16:13:54 +0900 Subject: sh: Move the shared INTC code out to drivers/sh/ The INTC code will be re-used across different architectures, so move this out to drivers/sh/ and include/linux/sh_intc.h respectively. Signed-off-by: Paul Mundt --- arch/sh/include/asm/hw_irq.h | 92 +----- arch/sh/kernel/cpu/irq/Makefile | 2 - arch/sh/kernel/cpu/irq/intc.c | 712 --------------------------------------- drivers/sh/Makefile | 2 +- drivers/sh/intc.c | 713 ++++++++++++++++++++++++++++++++++++++++ include/linux/sh_intc.h | 91 +++++ 6 files changed, 807 insertions(+), 805 deletions(-) delete mode 100644 arch/sh/kernel/cpu/irq/intc.c create mode 100644 drivers/sh/intc.c create mode 100644 include/linux/sh_intc.h (limited to 'include') diff --git a/arch/sh/include/asm/hw_irq.h b/arch/sh/include/asm/hw_irq.h index d557b00111bf..603cdde813d1 100644 --- a/arch/sh/include/asm/hw_irq.h +++ b/arch/sh/include/asm/hw_irq.h @@ -2,6 +2,7 @@ #define __ASM_SH_HW_IRQ_H #include +#include #include extern atomic_t irq_err_count; @@ -23,101 +24,12 @@ struct ipr_desc { void register_ipr_controller(struct ipr_desc *); -typedef unsigned char intc_enum; - -struct intc_vect { - intc_enum enum_id; - unsigned short vect; -}; - -#define INTC_VECT(enum_id, vect) { enum_id, vect } -#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq)) - -struct intc_group { - intc_enum enum_id; - intc_enum enum_ids[32]; -}; - -#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } } - -struct intc_mask_reg { - unsigned long set_reg, clr_reg, reg_width; - intc_enum enum_ids[32]; -#ifdef CONFIG_SMP - unsigned long smp; -#endif -}; - -struct intc_prio_reg { - unsigned long set_reg, clr_reg, reg_width, field_width; - intc_enum enum_ids[16]; -#ifdef CONFIG_SMP - unsigned long smp; -#endif -}; - -struct intc_sense_reg { - unsigned long reg, reg_width, field_width; - intc_enum enum_ids[16]; -}; - -#ifdef CONFIG_SMP -#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8) -#else -#define INTC_SMP(stride, nr) -#endif - -struct intc_desc { - struct intc_vect *vectors; - unsigned int nr_vectors; - struct intc_group *groups; - unsigned int nr_groups; - struct intc_mask_reg *mask_regs; - unsigned int nr_mask_regs; - struct intc_prio_reg *prio_regs; - unsigned int nr_prio_regs; - struct intc_sense_reg *sense_regs; - unsigned int nr_sense_regs; - char *name; -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) - struct intc_mask_reg *ack_regs; - unsigned int nr_ack_regs; -#endif -}; - -#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a) -#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups, \ - mask_regs, prio_regs, sense_regs) \ -struct intc_desc symbol __initdata = { \ - _INTC_ARRAY(vectors), _INTC_ARRAY(groups), \ - _INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs), \ - _INTC_ARRAY(sense_regs), \ - chipname, \ -} - -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) -#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups, \ - mask_regs, prio_regs, sense_regs, ack_regs) \ -struct intc_desc symbol __initdata = { \ - _INTC_ARRAY(vectors), _INTC_ARRAY(groups), \ - _INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs), \ - _INTC_ARRAY(sense_regs), \ - chipname, \ - _INTC_ARRAY(ack_regs), \ -} -#endif - -void __init register_intc_controller(struct intc_desc *desc); -int intc_set_priority(unsigned int irq, unsigned int prio); - void __init plat_irq_setup(void); -#ifdef CONFIG_CPU_SH3 void __init plat_irq_setup_sh3(void); -#endif +void __init plat_irq_setup_pins(int mode); enum { IRQ_MODE_IRQ, IRQ_MODE_IRQ7654, IRQ_MODE_IRQ3210, IRQ_MODE_IRL7654_MASK, IRQ_MODE_IRL3210_MASK, IRQ_MODE_IRL7654, IRQ_MODE_IRL3210 }; -void __init plat_irq_setup_pins(int mode); #endif /* __ASM_SH_HW_IRQ_H */ diff --git a/arch/sh/kernel/cpu/irq/Makefile b/arch/sh/kernel/cpu/irq/Makefile index 462a8f6dfee2..f0c7025a67d1 100644 --- a/arch/sh/kernel/cpu/irq/Makefile +++ b/arch/sh/kernel/cpu/irq/Makefile @@ -1,8 +1,6 @@ # # Makefile for the Linux/SuperH CPU-specifc IRQ handlers. # -obj-y += intc.o - obj-$(CONFIG_SUPERH32) += imask.o obj-$(CONFIG_CPU_SH5) += intc-sh5.o obj-$(CONFIG_CPU_HAS_IPR_IRQ) += ipr.o diff --git a/arch/sh/kernel/cpu/irq/intc.c b/arch/sh/kernel/cpu/irq/intc.c deleted file mode 100644 index 138efa4e95db..000000000000 --- a/arch/sh/kernel/cpu/irq/intc.c +++ /dev/null @@ -1,712 +0,0 @@ -/* - * Shared interrupt handling code for IPR and INTC2 types of IRQs. - * - * Copyright (C) 2007, 2008 Magnus Damm - * - * Based on intc2.c and ipr.c - * - * Copyright (C) 1999 Niibe Yutaka & Takeshi Yaegashi - * Copyright (C) 2000 Kazumoto Kojima - * Copyright (C) 2001 David J. Mckay (david.mckay@st.com) - * Copyright (C) 2003 Takashi Kusuda - * Copyright (C) 2005, 2006 Paul Mundt - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include -#include -#include -#include -#include - -#define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \ - ((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \ - ((addr_e) << 16) | ((addr_d << 24))) - -#define _INTC_SHIFT(h) (h & 0x1f) -#define _INTC_WIDTH(h) ((h >> 5) & 0xf) -#define _INTC_FN(h) ((h >> 9) & 0xf) -#define _INTC_MODE(h) ((h >> 13) & 0x7) -#define _INTC_ADDR_E(h) ((h >> 16) & 0xff) -#define _INTC_ADDR_D(h) ((h >> 24) & 0xff) - -struct intc_handle_int { - unsigned int irq; - unsigned long handle; -}; - -struct intc_desc_int { - unsigned long *reg; -#ifdef CONFIG_SMP - unsigned long *smp; -#endif - unsigned int nr_reg; - struct intc_handle_int *prio; - unsigned int nr_prio; - struct intc_handle_int *sense; - unsigned int nr_sense; - struct irq_chip chip; -}; - -#ifdef CONFIG_SMP -#define IS_SMP(x) x.smp -#define INTC_REG(d, x, c) (d->reg[(x)] + ((d->smp[(x)] & 0xff) * c)) -#define SMP_NR(d, x) ((d->smp[(x)] >> 8) ? (d->smp[(x)] >> 8) : 1) -#else -#define IS_SMP(x) 0 -#define INTC_REG(d, x, c) (d->reg[(x)]) -#define SMP_NR(d, x) 1 -#endif - -static unsigned int intc_prio_level[NR_IRQS]; /* for now */ -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) -static unsigned long ack_handle[NR_IRQS]; -#endif - -static inline struct intc_desc_int *get_intc_desc(unsigned int irq) -{ - struct irq_chip *chip = get_irq_chip(irq); - return (void *)((char *)chip - offsetof(struct intc_desc_int, chip)); -} - -static inline unsigned int set_field(unsigned int value, - unsigned int field_value, - unsigned int handle) -{ - unsigned int width = _INTC_WIDTH(handle); - unsigned int shift = _INTC_SHIFT(handle); - - value &= ~(((1 << width) - 1) << shift); - value |= field_value << shift; - return value; -} - -static void write_8(unsigned long addr, unsigned long h, unsigned long data) -{ - __raw_writeb(set_field(0, data, h), addr); -} - -static void write_16(unsigned long addr, unsigned long h, unsigned long data) -{ - __raw_writew(set_field(0, data, h), addr); -} - -static void write_32(unsigned long addr, unsigned long h, unsigned long data) -{ - __raw_writel(set_field(0, data, h), addr); -} - -static void modify_8(unsigned long addr, unsigned long h, unsigned long data) -{ - unsigned long flags; - local_irq_save(flags); - __raw_writeb(set_field(__raw_readb(addr), data, h), addr); - local_irq_restore(flags); -} - -static void modify_16(unsigned long addr, unsigned long h, unsigned long data) -{ - unsigned long flags; - local_irq_save(flags); - __raw_writew(set_field(__raw_readw(addr), data, h), addr); - local_irq_restore(flags); -} - -static void modify_32(unsigned long addr, unsigned long h, unsigned long data) -{ - unsigned long flags; - local_irq_save(flags); - __raw_writel(set_field(__raw_readl(addr), data, h), addr); - local_irq_restore(flags); -} - -enum { REG_FN_ERR = 0, REG_FN_WRITE_BASE = 1, REG_FN_MODIFY_BASE = 5 }; - -static void (*intc_reg_fns[])(unsigned long addr, - unsigned long h, - unsigned long data) = { - [REG_FN_WRITE_BASE + 0] = write_8, - [REG_FN_WRITE_BASE + 1] = write_16, - [REG_FN_WRITE_BASE + 3] = write_32, - [REG_FN_MODIFY_BASE + 0] = modify_8, - [REG_FN_MODIFY_BASE + 1] = modify_16, - [REG_FN_MODIFY_BASE + 3] = modify_32, -}; - -enum { MODE_ENABLE_REG = 0, /* Bit(s) set -> interrupt enabled */ - MODE_MASK_REG, /* Bit(s) set -> interrupt disabled */ - MODE_DUAL_REG, /* Two registers, set bit to enable / disable */ - MODE_PRIO_REG, /* Priority value written to enable interrupt */ - MODE_PCLR_REG, /* Above plus all bits set to disable interrupt */ -}; - -static void intc_mode_field(unsigned long addr, - unsigned long handle, - void (*fn)(unsigned long, - unsigned long, - unsigned long), - unsigned int irq) -{ - fn(addr, handle, ((1 << _INTC_WIDTH(handle)) - 1)); -} - -static void intc_mode_zero(unsigned long addr, - unsigned long handle, - void (*fn)(unsigned long, - unsigned long, - unsigned long), - unsigned int irq) -{ - fn(addr, handle, 0); -} - -static void intc_mode_prio(unsigned long addr, - unsigned long handle, - void (*fn)(unsigned long, - unsigned long, - unsigned long), - unsigned int irq) -{ - fn(addr, handle, intc_prio_level[irq]); -} - -static void (*intc_enable_fns[])(unsigned long addr, - unsigned long handle, - void (*fn)(unsigned long, - unsigned long, - unsigned long), - unsigned int irq) = { - [MODE_ENABLE_REG] = intc_mode_field, - [MODE_MASK_REG] = intc_mode_zero, - [MODE_DUAL_REG] = intc_mode_field, - [MODE_PRIO_REG] = intc_mode_prio, - [MODE_PCLR_REG] = intc_mode_prio, -}; - -static void (*intc_disable_fns[])(unsigned long addr, - unsigned long handle, - void (*fn)(unsigned long, - unsigned long, - unsigned long), - unsigned int irq) = { - [MODE_ENABLE_REG] = intc_mode_zero, - [MODE_MASK_REG] = intc_mode_field, - [MODE_DUAL_REG] = intc_mode_field, - [MODE_PRIO_REG] = intc_mode_zero, - [MODE_PCLR_REG] = intc_mode_field, -}; - -static inline void _intc_enable(unsigned int irq, unsigned long handle) -{ - struct intc_desc_int *d = get_intc_desc(irq); - unsigned long addr; - unsigned int cpu; - - for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) { - addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu); - intc_enable_fns[_INTC_MODE(handle)](addr, handle, intc_reg_fns\ - [_INTC_FN(handle)], irq); - } -} - -static void intc_enable(unsigned int irq) -{ - _intc_enable(irq, (unsigned long)get_irq_chip_data(irq)); -} - -static void intc_disable(unsigned int irq) -{ - struct intc_desc_int *d = get_intc_desc(irq); - unsigned long handle = (unsigned long) get_irq_chip_data(irq); - unsigned long addr; - unsigned int cpu; - - for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) { - addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu); - intc_disable_fns[_INTC_MODE(handle)](addr, handle,intc_reg_fns\ - [_INTC_FN(handle)], irq); - } -} - -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) -static void intc_mask_ack(unsigned int irq) -{ - struct intc_desc_int *d = get_intc_desc(irq); - unsigned long handle = ack_handle[irq]; - unsigned long addr; - - intc_disable(irq); - - /* read register and write zero only to the assocaited bit */ - - if (handle) { - addr = INTC_REG(d, _INTC_ADDR_D(handle), 0); - switch (_INTC_FN(handle)) { - case REG_FN_MODIFY_BASE + 0: /* 8bit */ - __raw_readb(addr); - __raw_writeb(0xff ^ set_field(0, 1, handle), addr); - break; - case REG_FN_MODIFY_BASE + 1: /* 16bit */ - __raw_readw(addr); - __raw_writew(0xffff ^ set_field(0, 1, handle), addr); - break; - case REG_FN_MODIFY_BASE + 3: /* 32bit */ - __raw_readl(addr); - __raw_writel(0xffffffff ^ set_field(0, 1, handle), addr); - break; - default: - BUG(); - break; - } - } -} -#endif - -static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp, - unsigned int nr_hp, - unsigned int irq) -{ - int i; - - /* this doesn't scale well, but... - * - * this function should only be used for cerain uncommon - * operations such as intc_set_priority() and intc_set_sense() - * and in those rare cases performance doesn't matter that much. - * keeping the memory footprint low is more important. - * - * one rather simple way to speed this up and still keep the - * memory footprint down is to make sure the array is sorted - * and then perform a bisect to lookup the irq. - */ - - for (i = 0; i < nr_hp; i++) { - if ((hp + i)->irq != irq) - continue; - - return hp + i; - } - - return NULL; -} - -int intc_set_priority(unsigned int irq, unsigned int prio) -{ - struct intc_desc_int *d = get_intc_desc(irq); - struct intc_handle_int *ihp; - - if (!intc_prio_level[irq] || prio <= 1) - return -EINVAL; - - ihp = intc_find_irq(d->prio, d->nr_prio, irq); - if (ihp) { - if (prio >= (1 << _INTC_WIDTH(ihp->handle))) - return -EINVAL; - - intc_prio_level[irq] = prio; - - /* - * only set secondary masking method directly - * primary masking method is using intc_prio_level[irq] - * priority level will be set during next enable() - */ - - if (_INTC_FN(ihp->handle) != REG_FN_ERR) - _intc_enable(irq, ihp->handle); - } - return 0; -} - -#define VALID(x) (x | 0x80) - -static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = { - [IRQ_TYPE_EDGE_FALLING] = VALID(0), - [IRQ_TYPE_EDGE_RISING] = VALID(1), - [IRQ_TYPE_LEVEL_LOW] = VALID(2), - /* SH7706, SH7707 and SH7709 do not support high level triggered */ -#if !defined(CONFIG_CPU_SUBTYPE_SH7706) && \ - !defined(CONFIG_CPU_SUBTYPE_SH7707) && \ - !defined(CONFIG_CPU_SUBTYPE_SH7709) - [IRQ_TYPE_LEVEL_HIGH] = VALID(3), -#endif -}; - -static int intc_set_sense(unsigned int irq, unsigned int type) -{ - struct intc_desc_int *d = get_intc_desc(irq); - unsigned char value = intc_irq_sense_table[type & IRQ_TYPE_SENSE_MASK]; - struct intc_handle_int *ihp; - unsigned long addr; - - if (!value) - return -EINVAL; - - ihp = intc_find_irq(d->sense, d->nr_sense, irq); - if (ihp) { - addr = INTC_REG(d, _INTC_ADDR_E(ihp->handle), 0); - intc_reg_fns[_INTC_FN(ihp->handle)](addr, ihp->handle, value); - } - return 0; -} - -static unsigned int __init intc_get_reg(struct intc_desc_int *d, - unsigned long address) -{ - unsigned int k; - - for (k = 0; k < d->nr_reg; k++) { - if (d->reg[k] == address) - return k; - } - - BUG(); - return 0; -} - -static intc_enum __init intc_grp_id(struct intc_desc *desc, - intc_enum enum_id) -{ - struct intc_group *g = desc->groups; - unsigned int i, j; - - for (i = 0; g && enum_id && i < desc->nr_groups; i++) { - g = desc->groups + i; - - for (j = 0; g->enum_ids[j]; j++) { - if (g->enum_ids[j] != enum_id) - continue; - - return g->enum_id; - } - } - - return 0; -} - -static unsigned int __init intc_mask_data(struct intc_desc *desc, - struct intc_desc_int *d, - intc_enum enum_id, int do_grps) -{ - struct intc_mask_reg *mr = desc->mask_regs; - unsigned int i, j, fn, mode; - unsigned long reg_e, reg_d; - - for (i = 0; mr && enum_id && i < desc->nr_mask_regs; i++) { - mr = desc->mask_regs + i; - - for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) { - if (mr->enum_ids[j] != enum_id) - continue; - - if (mr->set_reg && mr->clr_reg) { - fn = REG_FN_WRITE_BASE; - mode = MODE_DUAL_REG; - reg_e = mr->clr_reg; - reg_d = mr->set_reg; - } else { - fn = REG_FN_MODIFY_BASE; - if (mr->set_reg) { - mode = MODE_ENABLE_REG; - reg_e = mr->set_reg; - reg_d = mr->set_reg; - } else { - mode = MODE_MASK_REG; - reg_e = mr->clr_reg; - reg_d = mr->clr_reg; - } - } - - fn += (mr->reg_width >> 3) - 1; - return _INTC_MK(fn, mode, - intc_get_reg(d, reg_e), - intc_get_reg(d, reg_d), - 1, - (mr->reg_width - 1) - j); - } - } - - if (do_grps) - return intc_mask_data(desc, d, intc_grp_id(desc, enum_id), 0); - - return 0; -} - -static unsigned int __init intc_prio_data(struct intc_desc *desc, - struct intc_desc_int *d, - intc_enum enum_id, int do_grps) -{ - struct intc_prio_reg *pr = desc->prio_regs; - unsigned int i, j, fn, mode, bit; - unsigned long reg_e, reg_d; - - for (i = 0; pr && enum_id && i < desc->nr_prio_regs; i++) { - pr = desc->prio_regs + i; - - for (j = 0; j < ARRAY_SIZE(pr->enum_ids); j++) { - if (pr->enum_ids[j] != enum_id) - continue; - - if (pr->set_reg && pr->clr_reg) { - fn = REG_FN_WRITE_BASE; - mode = MODE_PCLR_REG; - reg_e = pr->set_reg; - reg_d = pr->clr_reg; - } else { - fn = REG_FN_MODIFY_BASE; - mode = MODE_PRIO_REG; - if (!pr->set_reg) - BUG(); - reg_e = pr->set_reg; - reg_d = pr->set_reg; - } - - fn += (pr->reg_width >> 3) - 1; - - BUG_ON((j + 1) * pr->field_width > pr->reg_width); - - bit = pr->reg_width - ((j + 1) * pr->field_width); - - return _INTC_MK(fn, mode, - intc_get_reg(d, reg_e), - intc_get_reg(d, reg_d), - pr->field_width, bit); - } - } - - if (do_grps) - return intc_prio_data(desc, d, intc_grp_id(desc, enum_id), 0); - - return 0; -} - -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) -static unsigned int __init intc_ack_data(struct intc_desc *desc, - struct intc_desc_int *d, - intc_enum enum_id) -{ - struct intc_mask_reg *mr = desc->ack_regs; - unsigned int i, j, fn, mode; - unsigned long reg_e, reg_d; - - for (i = 0; mr && enum_id && i < desc->nr_ack_regs; i++) { - mr = desc->ack_regs + i; - - for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) { - if (mr->enum_ids[j] != enum_id) - continue; - - fn = REG_FN_MODIFY_BASE; - mode = MODE_ENABLE_REG; - reg_e = mr->set_reg; - reg_d = mr->set_reg; - - fn += (mr->reg_width >> 3) - 1; - return _INTC_MK(fn, mode, - intc_get_reg(d, reg_e), - intc_get_reg(d, reg_d), - 1, - (mr->reg_width - 1) - j); - } - } - - return 0; -} -#endif - -static unsigned int __init intc_sense_data(struct intc_desc *desc, - struct intc_desc_int *d, - intc_enum enum_id) -{ - struct intc_sense_reg *sr = desc->sense_regs; - unsigned int i, j, fn, bit; - - for (i = 0; sr && enum_id && i < desc->nr_sense_regs; i++) { - sr = desc->sense_regs + i; - - for (j = 0; j < ARRAY_SIZE(sr->enum_ids); j++) { - if (sr->enum_ids[j] != enum_id) - continue; - - fn = REG_FN_MODIFY_BASE; - fn += (sr->reg_width >> 3) - 1; - - BUG_ON((j + 1) * sr->field_width > sr->reg_width); - - bit = sr->reg_width - ((j + 1) * sr->field_width); - - return _INTC_MK(fn, 0, intc_get_reg(d, sr->reg), - 0, sr->field_width, bit); - } - } - - return 0; -} - -static void __init intc_register_irq(struct intc_desc *desc, - struct intc_desc_int *d, - intc_enum enum_id, - unsigned int irq) -{ - struct intc_handle_int *hp; - unsigned int data[2], primary; - - /* Prefer single interrupt source bitmap over other combinations: - * 1. bitmap, single interrupt source - * 2. priority, single interrupt source - * 3. bitmap, multiple interrupt sources (groups) - * 4. priority, multiple interrupt sources (groups) - */ - - data[0] = intc_mask_data(desc, d, enum_id, 0); - data[1] = intc_prio_data(desc, d, enum_id, 0); - - primary = 0; - if (!data[0] && data[1]) - primary = 1; - - data[0] = data[0] ? data[0] : intc_mask_data(desc, d, enum_id, 1); - data[1] = data[1] ? data[1] : intc_prio_data(desc, d, enum_id, 1); - - if (!data[primary]) - primary ^= 1; - - BUG_ON(!data[primary]); /* must have primary masking method */ - - disable_irq_nosync(irq); - set_irq_chip_and_handler_name(irq, &d->chip, - handle_level_irq, "level"); - set_irq_chip_data(irq, (void *)data[primary]); - - /* set priority level - * - this needs to be at least 2 for 5-bit priorities on 7780 - */ - intc_prio_level[irq] = 2; - - /* enable secondary masking method if present */ - if (data[!primary]) - _intc_enable(irq, data[!primary]); - - /* add irq to d->prio list if priority is available */ - if (data[1]) { - hp = d->prio + d->nr_prio; - hp->irq = irq; - hp->handle = data[1]; - - if (primary) { - /* - * only secondary priority should access registers, so - * set _INTC_FN(h) = REG_FN_ERR for intc_set_priority() - */ - - hp->handle &= ~_INTC_MK(0x0f, 0, 0, 0, 0, 0); - hp->handle |= _INTC_MK(REG_FN_ERR, 0, 0, 0, 0, 0); - } - d->nr_prio++; - } - - /* add irq to d->sense list if sense is available */ - data[0] = intc_sense_data(desc, d, enum_id); - if (data[0]) { - (d->sense + d->nr_sense)->irq = irq; - (d->sense + d->nr_sense)->handle = data[0]; - d->nr_sense++; - } - - /* irq should be disabled by default */ - d->chip.mask(irq); - -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) - if (desc->ack_regs) - ack_handle[irq] = intc_ack_data(desc, d, enum_id); -#endif -} - -static unsigned int __init save_reg(struct intc_desc_int *d, - unsigned int cnt, - unsigned long value, - unsigned int smp) -{ - if (value) { - d->reg[cnt] = value; -#ifdef CONFIG_SMP - d->smp[cnt] = smp; -#endif - return 1; - } - - return 0; -} - - -void __init register_intc_controller(struct intc_desc *desc) -{ - unsigned int i, k, smp; - struct intc_desc_int *d; - - d = alloc_bootmem(sizeof(*d)); - - d->nr_reg = desc->mask_regs ? desc->nr_mask_regs * 2 : 0; - d->nr_reg += desc->prio_regs ? desc->nr_prio_regs * 2 : 0; - d->nr_reg += desc->sense_regs ? desc->nr_sense_regs : 0; - -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) - d->nr_reg += desc->ack_regs ? desc->nr_ack_regs : 0; -#endif - d->reg = alloc_bootmem(d->nr_reg * sizeof(*d->reg)); -#ifdef CONFIG_SMP - d->smp = alloc_bootmem(d->nr_reg * sizeof(*d->smp)); -#endif - k = 0; - - if (desc->mask_regs) { - for (i = 0; i < desc->nr_mask_regs; i++) { - smp = IS_SMP(desc->mask_regs[i]); - k += save_reg(d, k, desc->mask_regs[i].set_reg, smp); - k += save_reg(d, k, desc->mask_regs[i].clr_reg, smp); - } - } - - if (desc->prio_regs) { - d->prio = alloc_bootmem(desc->nr_vectors * sizeof(*d->prio)); - - for (i = 0; i < desc->nr_prio_regs; i++) { - smp = IS_SMP(desc->prio_regs[i]); - k += save_reg(d, k, desc->prio_regs[i].set_reg, smp); - k += save_reg(d, k, desc->prio_regs[i].clr_reg, smp); - } - } - - if (desc->sense_regs) { - d->sense = alloc_bootmem(desc->nr_vectors * sizeof(*d->sense)); - - for (i = 0; i < desc->nr_sense_regs; i++) { - k += save_reg(d, k, desc->sense_regs[i].reg, 0); - } - } - - d->chip.name = desc->name; - d->chip.mask = intc_disable; - d->chip.unmask = intc_enable; - d->chip.mask_ack = intc_disable; - d->chip.set_type = intc_set_sense; - -#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) - if (desc->ack_regs) { - for (i = 0; i < desc->nr_ack_regs; i++) - k += save_reg(d, k, desc->ack_regs[i].set_reg, 0); - - d->chip.mask_ack = intc_mask_ack; - } -#endif - - BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */ - - for (i = 0; i < desc->nr_vectors; i++) { - struct intc_vect *vect = desc->vectors + i; - - intc_register_irq(desc, d, vect->enum_id, evt2irq(vect->vect)); - } -} diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile index a96f4a8cfeb8..6a025cefe6dc 100644 --- a/drivers/sh/Makefile +++ b/drivers/sh/Makefile @@ -1,6 +1,6 @@ # # Makefile for the SuperH specific drivers. # - obj-$(CONFIG_SUPERHYWAY) += superhyway/ obj-$(CONFIG_MAPLE) += maple/ +obj-y += intc.o diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c new file mode 100644 index 000000000000..58d24c5a76ce --- /dev/null +++ b/drivers/sh/intc.c @@ -0,0 +1,713 @@ +/* + * Shared interrupt handling code for IPR and INTC2 types of IRQs. + * + * Copyright (C) 2007, 2008 Magnus Damm + * + * Based on intc2.c and ipr.c + * + * Copyright (C) 1999 Niibe Yutaka & Takeshi Yaegashi + * Copyright (C) 2000 Kazumoto Kojima + * Copyright (C) 2001 David J. Mckay (david.mckay@st.com) + * Copyright (C) 2003 Takashi Kusuda + * Copyright (C) 2005, 2006 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include + +#define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \ + ((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \ + ((addr_e) << 16) | ((addr_d << 24))) + +#define _INTC_SHIFT(h) (h & 0x1f) +#define _INTC_WIDTH(h) ((h >> 5) & 0xf) +#define _INTC_FN(h) ((h >> 9) & 0xf) +#define _INTC_MODE(h) ((h >> 13) & 0x7) +#define _INTC_ADDR_E(h) ((h >> 16) & 0xff) +#define _INTC_ADDR_D(h) ((h >> 24) & 0xff) + +struct intc_handle_int { + unsigned int irq; + unsigned long handle; +}; + +struct intc_desc_int { + unsigned long *reg; +#ifdef CONFIG_SMP + unsigned long *smp; +#endif + unsigned int nr_reg; + struct intc_handle_int *prio; + unsigned int nr_prio; + struct intc_handle_int *sense; + unsigned int nr_sense; + struct irq_chip chip; +}; + +#ifdef CONFIG_SMP +#define IS_SMP(x) x.smp +#define INTC_REG(d, x, c) (d->reg[(x)] + ((d->smp[(x)] & 0xff) * c)) +#define SMP_NR(d, x) ((d->smp[(x)] >> 8) ? (d->smp[(x)] >> 8) : 1) +#else +#define IS_SMP(x) 0 +#define INTC_REG(d, x, c) (d->reg[(x)]) +#define SMP_NR(d, x) 1 +#endif + +static unsigned int intc_prio_level[NR_IRQS]; /* for now */ +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) +static unsigned long ack_handle[NR_IRQS]; +#endif + +static inline struct intc_desc_int *get_intc_desc(unsigned int irq) +{ + struct irq_chip *chip = get_irq_chip(irq); + return (void *)((char *)chip - offsetof(struct intc_desc_int, chip)); +} + +static inline unsigned int set_field(unsigned int value, + unsigned int field_value, + unsigned int handle) +{ + unsigned int width = _INTC_WIDTH(handle); + unsigned int shift = _INTC_SHIFT(handle); + + value &= ~(((1 << width) - 1) << shift); + value |= field_value << shift; + return value; +} + +static void write_8(unsigned long addr, unsigned long h, unsigned long data) +{ + __raw_writeb(set_field(0, data, h), addr); +} + +static void write_16(unsigned long addr, unsigned long h, unsigned long data) +{ + __raw_writew(set_field(0, data, h), addr); +} + +static void write_32(unsigned long addr, unsigned long h, unsigned long data) +{ + __raw_writel(set_field(0, data, h), addr); +} + +static void modify_8(unsigned long addr, unsigned long h, unsigned long data) +{ + unsigned long flags; + local_irq_save(flags); + __raw_writeb(set_field(__raw_readb(addr), data, h), addr); + local_irq_restore(flags); +} + +static void modify_16(unsigned long addr, unsigned long h, unsigned long data) +{ + unsigned long flags; + local_irq_save(flags); + __raw_writew(set_field(__raw_readw(addr), data, h), addr); + local_irq_restore(flags); +} + +static void modify_32(unsigned long addr, unsigned long h, unsigned long data) +{ + unsigned long flags; + local_irq_save(flags); + __raw_writel(set_field(__raw_readl(addr), data, h), addr); + local_irq_restore(flags); +} + +enum { REG_FN_ERR = 0, REG_FN_WRITE_BASE = 1, REG_FN_MODIFY_BASE = 5 }; + +static void (*intc_reg_fns[])(unsigned long addr, + unsigned long h, + unsigned long data) = { + [REG_FN_WRITE_BASE + 0] = write_8, + [REG_FN_WRITE_BASE + 1] = write_16, + [REG_FN_WRITE_BASE + 3] = write_32, + [REG_FN_MODIFY_BASE + 0] = modify_8, + [REG_FN_MODIFY_BASE + 1] = modify_16, + [REG_FN_MODIFY_BASE + 3] = modify_32, +}; + +enum { MODE_ENABLE_REG = 0, /* Bit(s) set -> interrupt enabled */ + MODE_MASK_REG, /* Bit(s) set -> interrupt disabled */ + MODE_DUAL_REG, /* Two registers, set bit to enable / disable */ + MODE_PRIO_REG, /* Priority value written to enable interrupt */ + MODE_PCLR_REG, /* Above plus all bits set to disable interrupt */ +}; + +static void intc_mode_field(unsigned long addr, + unsigned long handle, + void (*fn)(unsigned long, + unsigned long, + unsigned long), + unsigned int irq) +{ + fn(addr, handle, ((1 << _INTC_WIDTH(handle)) - 1)); +} + +static void intc_mode_zero(unsigned long addr, + unsigned long handle, + void (*fn)(unsigned long, + unsigned long, + unsigned long), + unsigned int irq) +{ + fn(addr, handle, 0); +} + +static void intc_mode_prio(unsigned long addr, + unsigned long handle, + void (*fn)(unsigned long, + unsigned long, + unsigned long), + unsigned int irq) +{ + fn(addr, handle, intc_prio_level[irq]); +} + +static void (*intc_enable_fns[])(unsigned long addr, + unsigned long handle, + void (*fn)(unsigned long, + unsigned long, + unsigned long), + unsigned int irq) = { + [MODE_ENABLE_REG] = intc_mode_field, + [MODE_MASK_REG] = intc_mode_zero, + [MODE_DUAL_REG] = intc_mode_field, + [MODE_PRIO_REG] = intc_mode_prio, + [MODE_PCLR_REG] = intc_mode_prio, +}; + +static void (*intc_disable_fns[])(unsigned long addr, + unsigned long handle, + void (*fn)(unsigned long, + unsigned long, + unsigned long), + unsigned int irq) = { + [MODE_ENABLE_REG] = intc_mode_zero, + [MODE_MASK_REG] = intc_mode_field, + [MODE_DUAL_REG] = intc_mode_field, + [MODE_PRIO_REG] = intc_mode_zero, + [MODE_PCLR_REG] = intc_mode_field, +}; + +static inline void _intc_enable(unsigned int irq, unsigned long handle) +{ + struct intc_desc_int *d = get_intc_desc(irq); + unsigned long addr; + unsigned int cpu; + + for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_E(handle)); cpu++) { + addr = INTC_REG(d, _INTC_ADDR_E(handle), cpu); + intc_enable_fns[_INTC_MODE(handle)](addr, handle, intc_reg_fns\ + [_INTC_FN(handle)], irq); + } +} + +static void intc_enable(unsigned int irq) +{ + _intc_enable(irq, (unsigned long)get_irq_chip_data(irq)); +} + +static void intc_disable(unsigned int irq) +{ + struct intc_desc_int *d = get_intc_desc(irq); + unsigned long handle = (unsigned long) get_irq_chip_data(irq); + unsigned long addr; + unsigned int cpu; + + for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) { + addr = INTC_REG(d, _INTC_ADDR_D(handle), cpu); + intc_disable_fns[_INTC_MODE(handle)](addr, handle,intc_reg_fns\ + [_INTC_FN(handle)], irq); + } +} + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) +static void intc_mask_ack(unsigned int irq) +{ + struct intc_desc_int *d = get_intc_desc(irq); + unsigned long handle = ack_handle[irq]; + unsigned long addr; + + intc_disable(irq); + + /* read register and write zero only to the assocaited bit */ + + if (handle) { + addr = INTC_REG(d, _INTC_ADDR_D(handle), 0); + switch (_INTC_FN(handle)) { + case REG_FN_MODIFY_BASE + 0: /* 8bit */ + __raw_readb(addr); + __raw_writeb(0xff ^ set_field(0, 1, handle), addr); + break; + case REG_FN_MODIFY_BASE + 1: /* 16bit */ + __raw_readw(addr); + __raw_writew(0xffff ^ set_field(0, 1, handle), addr); + break; + case REG_FN_MODIFY_BASE + 3: /* 32bit */ + __raw_readl(addr); + __raw_writel(0xffffffff ^ set_field(0, 1, handle), addr); + break; + default: + BUG(); + break; + } + } +} +#endif + +static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp, + unsigned int nr_hp, + unsigned int irq) +{ + int i; + + /* this doesn't scale well, but... + * + * this function should only be used for cerain uncommon + * operations such as intc_set_priority() and intc_set_sense() + * and in those rare cases performance doesn't matter that much. + * keeping the memory footprint low is more important. + * + * one rather simple way to speed this up and still keep the + * memory footprint down is to make sure the array is sorted + * and then perform a bisect to lookup the irq. + */ + + for (i = 0; i < nr_hp; i++) { + if ((hp + i)->irq != irq) + continue; + + return hp + i; + } + + return NULL; +} + +int intc_set_priority(unsigned int irq, unsigned int prio) +{ + struct intc_desc_int *d = get_intc_desc(irq); + struct intc_handle_int *ihp; + + if (!intc_prio_level[irq] || prio <= 1) + return -EINVAL; + + ihp = intc_find_irq(d->prio, d->nr_prio, irq); + if (ihp) { + if (prio >= (1 << _INTC_WIDTH(ihp->handle))) + return -EINVAL; + + intc_prio_level[irq] = prio; + + /* + * only set secondary masking method directly + * primary masking method is using intc_prio_level[irq] + * priority level will be set during next enable() + */ + + if (_INTC_FN(ihp->handle) != REG_FN_ERR) + _intc_enable(irq, ihp->handle); + } + return 0; +} + +#define VALID(x) (x | 0x80) + +static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = { + [IRQ_TYPE_EDGE_FALLING] = VALID(0), + [IRQ_TYPE_EDGE_RISING] = VALID(1), + [IRQ_TYPE_LEVEL_LOW] = VALID(2), + /* SH7706, SH7707 and SH7709 do not support high level triggered */ +#if !defined(CONFIG_CPU_SUBTYPE_SH7706) && \ + !defined(CONFIG_CPU_SUBTYPE_SH7707) && \ + !defined(CONFIG_CPU_SUBTYPE_SH7709) + [IRQ_TYPE_LEVEL_HIGH] = VALID(3), +#endif +}; + +static int intc_set_sense(unsigned int irq, unsigned int type) +{ + struct intc_desc_int *d = get_intc_desc(irq); + unsigned char value = intc_irq_sense_table[type & IRQ_TYPE_SENSE_MASK]; + struct intc_handle_int *ihp; + unsigned long addr; + + if (!value) + return -EINVAL; + + ihp = intc_find_irq(d->sense, d->nr_sense, irq); + if (ihp) { + addr = INTC_REG(d, _INTC_ADDR_E(ihp->handle), 0); + intc_reg_fns[_INTC_FN(ihp->handle)](addr, ihp->handle, value); + } + return 0; +} + +static unsigned int __init intc_get_reg(struct intc_desc_int *d, + unsigned long address) +{ + unsigned int k; + + for (k = 0; k < d->nr_reg; k++) { + if (d->reg[k] == address) + return k; + } + + BUG(); + return 0; +} + +static intc_enum __init intc_grp_id(struct intc_desc *desc, + intc_enum enum_id) +{ + struct intc_group *g = desc->groups; + unsigned int i, j; + + for (i = 0; g && enum_id && i < desc->nr_groups; i++) { + g = desc->groups + i; + + for (j = 0; g->enum_ids[j]; j++) { + if (g->enum_ids[j] != enum_id) + continue; + + return g->enum_id; + } + } + + return 0; +} + +static unsigned int __init intc_mask_data(struct intc_desc *desc, + struct intc_desc_int *d, + intc_enum enum_id, int do_grps) +{ + struct intc_mask_reg *mr = desc->mask_regs; + unsigned int i, j, fn, mode; + unsigned long reg_e, reg_d; + + for (i = 0; mr && enum_id && i < desc->nr_mask_regs; i++) { + mr = desc->mask_regs + i; + + for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) { + if (mr->enum_ids[j] != enum_id) + continue; + + if (mr->set_reg && mr->clr_reg) { + fn = REG_FN_WRITE_BASE; + mode = MODE_DUAL_REG; + reg_e = mr->clr_reg; + reg_d = mr->set_reg; + } else { + fn = REG_FN_MODIFY_BASE; + if (mr->set_reg) { + mode = MODE_ENABLE_REG; + reg_e = mr->set_reg; + reg_d = mr->set_reg; + } else { + mode = MODE_MASK_REG; + reg_e = mr->clr_reg; + reg_d = mr->clr_reg; + } + } + + fn += (mr->reg_width >> 3) - 1; + return _INTC_MK(fn, mode, + intc_get_reg(d, reg_e), + intc_get_reg(d, reg_d), + 1, + (mr->reg_width - 1) - j); + } + } + + if (do_grps) + return intc_mask_data(desc, d, intc_grp_id(desc, enum_id), 0); + + return 0; +} + +static unsigned int __init intc_prio_data(struct intc_desc *desc, + struct intc_desc_int *d, + intc_enum enum_id, int do_grps) +{ + struct intc_prio_reg *pr = desc->prio_regs; + unsigned int i, j, fn, mode, bit; + unsigned long reg_e, reg_d; + + for (i = 0; pr && enum_id && i < desc->nr_prio_regs; i++) { + pr = desc->prio_regs + i; + + for (j = 0; j < ARRAY_SIZE(pr->enum_ids); j++) { + if (pr->enum_ids[j] != enum_id) + continue; + + if (pr->set_reg && pr->clr_reg) { + fn = REG_FN_WRITE_BASE; + mode = MODE_PCLR_REG; + reg_e = pr->set_reg; + reg_d = pr->clr_reg; + } else { + fn = REG_FN_MODIFY_BASE; + mode = MODE_PRIO_REG; + if (!pr->set_reg) + BUG(); + reg_e = pr->set_reg; + reg_d = pr->set_reg; + } + + fn += (pr->reg_width >> 3) - 1; + + BUG_ON((j + 1) * pr->field_width > pr->reg_width); + + bit = pr->reg_width - ((j + 1) * pr->field_width); + + return _INTC_MK(fn, mode, + intc_get_reg(d, reg_e), + intc_get_reg(d, reg_d), + pr->field_width, bit); + } + } + + if (do_grps) + return intc_prio_data(desc, d, intc_grp_id(desc, enum_id), 0); + + return 0; +} + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) +static unsigned int __init intc_ack_data(struct intc_desc *desc, + struct intc_desc_int *d, + intc_enum enum_id) +{ + struct intc_mask_reg *mr = desc->ack_regs; + unsigned int i, j, fn, mode; + unsigned long reg_e, reg_d; + + for (i = 0; mr && enum_id && i < desc->nr_ack_regs; i++) { + mr = desc->ack_regs + i; + + for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) { + if (mr->enum_ids[j] != enum_id) + continue; + + fn = REG_FN_MODIFY_BASE; + mode = MODE_ENABLE_REG; + reg_e = mr->set_reg; + reg_d = mr->set_reg; + + fn += (mr->reg_width >> 3) - 1; + return _INTC_MK(fn, mode, + intc_get_reg(d, reg_e), + intc_get_reg(d, reg_d), + 1, + (mr->reg_width - 1) - j); + } + } + + return 0; +} +#endif + +static unsigned int __init intc_sense_data(struct intc_desc *desc, + struct intc_desc_int *d, + intc_enum enum_id) +{ + struct intc_sense_reg *sr = desc->sense_regs; + unsigned int i, j, fn, bit; + + for (i = 0; sr && enum_id && i < desc->nr_sense_regs; i++) { + sr = desc->sense_regs + i; + + for (j = 0; j < ARRAY_SIZE(sr->enum_ids); j++) { + if (sr->enum_ids[j] != enum_id) + continue; + + fn = REG_FN_MODIFY_BASE; + fn += (sr->reg_width >> 3) - 1; + + BUG_ON((j + 1) * sr->field_width > sr->reg_width); + + bit = sr->reg_width - ((j + 1) * sr->field_width); + + return _INTC_MK(fn, 0, intc_get_reg(d, sr->reg), + 0, sr->field_width, bit); + } + } + + return 0; +} + +static void __init intc_register_irq(struct intc_desc *desc, + struct intc_desc_int *d, + intc_enum enum_id, + unsigned int irq) +{ + struct intc_handle_int *hp; + unsigned int data[2], primary; + + /* Prefer single interrupt source bitmap over other combinations: + * 1. bitmap, single interrupt source + * 2. priority, single interrupt source + * 3. bitmap, multiple interrupt sources (groups) + * 4. priority, multiple interrupt sources (groups) + */ + + data[0] = intc_mask_data(desc, d, enum_id, 0); + data[1] = intc_prio_data(desc, d, enum_id, 0); + + primary = 0; + if (!data[0] && data[1]) + primary = 1; + + data[0] = data[0] ? data[0] : intc_mask_data(desc, d, enum_id, 1); + data[1] = data[1] ? data[1] : intc_prio_data(desc, d, enum_id, 1); + + if (!data[primary]) + primary ^= 1; + + BUG_ON(!data[primary]); /* must have primary masking method */ + + disable_irq_nosync(irq); + set_irq_chip_and_handler_name(irq, &d->chip, + handle_level_irq, "level"); + set_irq_chip_data(irq, (void *)data[primary]); + + /* set priority level + * - this needs to be at least 2 for 5-bit priorities on 7780 + */ + intc_prio_level[irq] = 2; + + /* enable secondary masking method if present */ + if (data[!primary]) + _intc_enable(irq, data[!primary]); + + /* add irq to d->prio list if priority is available */ + if (data[1]) { + hp = d->prio + d->nr_prio; + hp->irq = irq; + hp->handle = data[1]; + + if (primary) { + /* + * only secondary priority should access registers, so + * set _INTC_FN(h) = REG_FN_ERR for intc_set_priority() + */ + + hp->handle &= ~_INTC_MK(0x0f, 0, 0, 0, 0, 0); + hp->handle |= _INTC_MK(REG_FN_ERR, 0, 0, 0, 0, 0); + } + d->nr_prio++; + } + + /* add irq to d->sense list if sense is available */ + data[0] = intc_sense_data(desc, d, enum_id); + if (data[0]) { + (d->sense + d->nr_sense)->irq = irq; + (d->sense + d->nr_sense)->handle = data[0]; + d->nr_sense++; + } + + /* irq should be disabled by default */ + d->chip.mask(irq); + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) + if (desc->ack_regs) + ack_handle[irq] = intc_ack_data(desc, d, enum_id); +#endif +} + +static unsigned int __init save_reg(struct intc_desc_int *d, + unsigned int cnt, + unsigned long value, + unsigned int smp) +{ + if (value) { + d->reg[cnt] = value; +#ifdef CONFIG_SMP + d->smp[cnt] = smp; +#endif + return 1; + } + + return 0; +} + + +void __init register_intc_controller(struct intc_desc *desc) +{ + unsigned int i, k, smp; + struct intc_desc_int *d; + + d = alloc_bootmem(sizeof(*d)); + + d->nr_reg = desc->mask_regs ? desc->nr_mask_regs * 2 : 0; + d->nr_reg += desc->prio_regs ? desc->nr_prio_regs * 2 : 0; + d->nr_reg += desc->sense_regs ? desc->nr_sense_regs : 0; + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) + d->nr_reg += desc->ack_regs ? desc->nr_ack_regs : 0; +#endif + d->reg = alloc_bootmem(d->nr_reg * sizeof(*d->reg)); +#ifdef CONFIG_SMP + d->smp = alloc_bootmem(d->nr_reg * sizeof(*d->smp)); +#endif + k = 0; + + if (desc->mask_regs) { + for (i = 0; i < desc->nr_mask_regs; i++) { + smp = IS_SMP(desc->mask_regs[i]); + k += save_reg(d, k, desc->mask_regs[i].set_reg, smp); + k += save_reg(d, k, desc->mask_regs[i].clr_reg, smp); + } + } + + if (desc->prio_regs) { + d->prio = alloc_bootmem(desc->nr_vectors * sizeof(*d->prio)); + + for (i = 0; i < desc->nr_prio_regs; i++) { + smp = IS_SMP(desc->prio_regs[i]); + k += save_reg(d, k, desc->prio_regs[i].set_reg, smp); + k += save_reg(d, k, desc->prio_regs[i].clr_reg, smp); + } + } + + if (desc->sense_regs) { + d->sense = alloc_bootmem(desc->nr_vectors * sizeof(*d->sense)); + + for (i = 0; i < desc->nr_sense_regs; i++) { + k += save_reg(d, k, desc->sense_regs[i].reg, 0); + } + } + + d->chip.name = desc->name; + d->chip.mask = intc_disable; + d->chip.unmask = intc_enable; + d->chip.mask_ack = intc_disable; + d->chip.set_type = intc_set_sense; + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) + if (desc->ack_regs) { + for (i = 0; i < desc->nr_ack_regs; i++) + k += save_reg(d, k, desc->ack_regs[i].set_reg, 0); + + d->chip.mask_ack = intc_mask_ack; + } +#endif + + BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */ + + for (i = 0; i < desc->nr_vectors; i++) { + struct intc_vect *vect = desc->vectors + i; + + intc_register_irq(desc, d, vect->enum_id, evt2irq(vect->vect)); + } +} diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h new file mode 100644 index 000000000000..68e212ff9dde --- /dev/null +++ b/include/linux/sh_intc.h @@ -0,0 +1,91 @@ +#ifndef __SH_INTC_H +#define __SH_INTC_H + +typedef unsigned char intc_enum; + +struct intc_vect { + intc_enum enum_id; + unsigned short vect; +}; + +#define INTC_VECT(enum_id, vect) { enum_id, vect } +#define INTC_IRQ(enum_id, irq) INTC_VECT(enum_id, irq2evt(irq)) + +struct intc_group { + intc_enum enum_id; + intc_enum enum_ids[32]; +}; + +#define INTC_GROUP(enum_id, ids...) { enum_id, { ids } } + +struct intc_mask_reg { + unsigned long set_reg, clr_reg, reg_width; + intc_enum enum_ids[32]; +#ifdef CONFIG_SMP + unsigned long smp; +#endif +}; + +struct intc_prio_reg { + unsigned long set_reg, clr_reg, reg_width, field_width; + intc_enum enum_ids[16]; +#ifdef CONFIG_SMP + unsigned long smp; +#endif +}; + +struct intc_sense_reg { + unsigned long reg, reg_width, field_width; + intc_enum enum_ids[16]; +}; + +#ifdef CONFIG_SMP +#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8) +#else +#define INTC_SMP(stride, nr) +#endif + +struct intc_desc { + struct intc_vect *vectors; + unsigned int nr_vectors; + struct intc_group *groups; + unsigned int nr_groups; + struct intc_mask_reg *mask_regs; + unsigned int nr_mask_regs; + struct intc_prio_reg *prio_regs; + unsigned int nr_prio_regs; + struct intc_sense_reg *sense_regs; + unsigned int nr_sense_regs; + char *name; +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) + struct intc_mask_reg *ack_regs; + unsigned int nr_ack_regs; +#endif +}; + +#define _INTC_ARRAY(a) a, sizeof(a)/sizeof(*a) +#define DECLARE_INTC_DESC(symbol, chipname, vectors, groups, \ + mask_regs, prio_regs, sense_regs) \ +struct intc_desc symbol __initdata = { \ + _INTC_ARRAY(vectors), _INTC_ARRAY(groups), \ + _INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs), \ + _INTC_ARRAY(sense_regs), \ + chipname, \ +} + +#if defined(CONFIG_CPU_SH3) || defined(CONFIG_CPU_SH4A) +#define DECLARE_INTC_DESC_ACK(symbol, chipname, vectors, groups, \ + mask_regs, prio_regs, sense_regs, ack_regs) \ +struct intc_desc symbol __initdata = { \ + _INTC_ARRAY(vectors), _INTC_ARRAY(groups), \ + _INTC_ARRAY(mask_regs), _INTC_ARRAY(prio_regs), \ + _INTC_ARRAY(sense_regs), \ + chipname, \ + _INTC_ARRAY(ack_regs), \ +} +#endif + +void __init register_intc_controller(struct intc_desc *desc); +int intc_set_priority(unsigned int irq, unsigned int prio); + +#endif /* __SH_INTC_H */ -- cgit v1.2.3 From 225c9a8d1da274bf23efec43ec28b1c9e45e12f8 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 1 Oct 2008 16:24:32 +0900 Subject: video: sh_mobile_lcdcfb: Support HAVE_CLK=n configurations. This provides a workaround for users of sh_mobile_lcdcfb that don't define HAVE_CLK and have otherwise sane clock initialization. At the same time, move the sh_mobile_lcdc.h header to include/video/. Signed-off-by: Paul Mundt --- arch/sh/boards/board-ap325rxa.c | 2 +- arch/sh/boards/mach-migor/lcd_qvga.c | 2 +- arch/sh/boards/mach-migor/setup.c | 2 +- arch/sh/include/asm/migor.h | 2 +- arch/sh/include/asm/sh_mobile_lcdc.h | 72 ------------------------------------ drivers/video/sh_mobile_lcdcfb.c | 8 +++- include/video/sh_mobile_lcdc.h | 72 ++++++++++++++++++++++++++++++++++++ 7 files changed, 83 insertions(+), 77 deletions(-) delete mode 100644 arch/sh/include/asm/sh_mobile_lcdc.h create mode 100644 include/video/sh_mobile_lcdc.h (limited to 'include') diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c index 00e632fc0688..7ae8dcddfeb4 100644 --- a/arch/sh/boards/board-ap325rxa.c +++ b/arch/sh/boards/board-ap325rxa.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include