/* znet.c: An Zenith Z-Note ethernet driver for linux. */ /* Written by Donald Becker. The author may be reached as becker@scyld.com. This driver is based on the Linux skeleton driver. The copyright of the skeleton driver is held by the United States Government, as represented by DIRNSA, and it is released under the GPL. Thanks to Mike Hollick for alpha testing and suggestions. References: The Crynwr packet driver. "82593 CSMA/CD Core LAN Controller" Intel datasheet, 1992 Intel Microcommunications Databook, Vol. 1, 1990. As usual with Intel, the documentation is incomplete and inaccurate. I had to read the Crynwr packet driver to figure out how to actually use the i82593, and guess at what register bits matched the loosely related i82586. Theory of Operation The i82593 used in the Zenith Z-Note series operates using two(!) slave DMA channels, one interrupt, and one 8-bit I/O port. While there several ways to configure '593 DMA system, I chose the one that seemed commensurate with the highest system performance in the face of moderate interrupt latency: Both DMA channels are configured as recirculating ring buffers, with one channel (#0) dedicated to Rx and the other channel (#1) to Tx and configuration. (Note that this is different than the Crynwr driver, where the Tx DMA channel is initialized before each operation. That approach simplifies operation and Tx error recovery, but requires additional I/O in normal operation and precludes transmit buffer chaining.) Both rings are set to 8192 bytes using {TX,RX}_RING_SIZE. This provides a reasonable ring size for Rx, while simplifying DMA buffer allocation -- DMA buffers must not cross a 128K boundary. (In truth the size selection was influenced by my lack of '593 documentation. I thus was constrained to use the Crynwr '593 initialization table, which sets the Rx ring size to 8K.) Despite my usual low opinion about Intel-designed parts, I must admit that the bulk data handling of the i82593 is a good design for an integrated system, like a laptop, where using two slave DMA channels doesn't pose a problem. I still take issue with using only a single I/O port. In the same controlled environment there are essentially no limitations on I/O space, and using multiple locations would eliminate the need for multiple operations when looking at status registers, setting the Rx ring boundary, or switching to promiscuous mode. I also question Zenith's selection of the '593: one of the advertised advantages of earlier Intel parts was that if you figured out the magic initialization incantation you could use the same part on many different network types. Zenith's use of the "FriendlyNet" (sic) connector rather than an on-board transceiver leads me to believe that they were planning to take advantage of this. But, uhmmm, the '593 omits all but ethernet functionality from the serial subsystem. */ /* 10/2002 o Resurected for Linux 2.5+ by Marc Zyngier : - Removed strange DMA snooping in znet_sent_packet, which lead to TX buffer corruption on my laptop. - Use init_etherdev stuff. - Use kmalloc-ed DMA buffers. - Use as few global variables as possible. - Use proper resources management. - Use wireless/i82593.h as much as possible (structure, constants) - Compiles as module or build-in. - Now survives unplugging/replugging cable. Some code was taken from wavelan_cs. Tested on a vintage Zenith Z-Note 433Lnp+. Probably broken on anything else. Testers (and detailed bug reports) are welcome :-). o TODO : - Properly handle multicast - Understand why some traffic patterns add a 1s latency... */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static char version[] __initdata = "znet.c:v1.02 9/23/94 becker@scyld.com\n"; #ifndef ZNET_DEBUG #define ZNET_DEBUG 1 #endif static unsigned int znet_debug = ZNET_DEBUG; module_param (znet_debug, int, 0); MODULE_PARM_DESC (znet_debug, "ZNet debug level"); MODULE_LICENSE("GPL"); /* The DMA modes we need aren't in . */ #define DMA_RX_MODE 0x14 /* Auto init, I/O to mem, ++, demand. */ #define DMA_TX_MODE 0x18 /* Auto init, Mem to I/O, ++, demand. */ #define dma_page_eq(ptr1, ptr2) ((long)(ptr1)>>17 == (long)(ptr2)>>17) #define RX_BUF_SIZE 8192 #define TX_BUF_SIZE 8192 #define DMA_BUF_SIZE (RX_BUF_SIZE + 16) /* 8k + 16 bytes for trailers */ #define TX_TIMEOUT 10 struct znet_private { int rx_dma, tx_dma; spinlock_t lock; short sia_base, sia_size, io_size; struct i82593_conf_block i593_init; /* The starting, current, and end pointers for the packet buffers. */ ushort *rx_start, *rx_cur, *rx_end; ushort *tx_start, *tx_cur, *tx_end; ushort tx_buf_len; /* Tx buffer length, in words. */ }; /* Only one can be built-in;-> */ static struct net_device *znet_dev; struct netidblk { char magic[8]; /* The magic number (string) "NETIDBLK" */ unsigned char netid[8]; /* The physical station address */ char nettype, globalopt; char vendor[8]; /* The machine vendor and product name. */ char product[8]; char irq1, irq2; /* Interrupts, only one is currently used. */ char dma1, dma2; short dma_mem_misc[8]; /* DMA buffer locations (unused in Linux). */ short iobase1, iosize1; short iobase2, iosize2; /* Second iobase unused. */ char driver_options; /* Misc. bits */ char pad; }; static int znet_open(struct net_device *dev); static netdev_tx_t znet_send_packet(struct sk_buff *skb, struct net_device *dev); static irqreturn_t znet_interrupt(int irq, void *dev_id); static void znet_rx(struct net_device *dev); static int znet_close(struct net_device *dev); static void hardware_init(struct net_device *dev); static void update_stop_hit(short ioaddr, unsigned short rx_stop_offset); static void znet_tx_timeout (struct net_device *dev); /* Request needed resources */ static int znet_request_resources (struct net_device *dev) { struct znet_private *znet = netdev_priv(dev); if (request_irq (dev->irq, znet_interrupt, 0, "ZNet", dev)) goto failed; if (request_dma (znet->rx_dma, "ZNet rx")) goto free_irq; if (request_dma (znet->tx_dma, "ZNet tx")) goto free_rx_dma; if (!request_region (znet->sia_base, znet->sia_size, "ZNet SIA")) goto free_tx_dma; if (!request_region (dev->base_addr, znet->io_size, "ZNet I/O")) goto free_sia; return 0; /* Happy ! */ free_sia: release_region (znet->sia_base, znet->sia_size); free_tx_dma: free_dma (znet->tx_dma); free_rx_dma: free_dma (znet->rx_dma); free_irq: free_irq (dev->irq, dev); failed: return -1; } static void znet_release_resources (struct net_device *dev) { struct znet_private *znet = netdev_priv(dev); release_region (znet->sia_base, znet->sia_size); release_region (dev->base_addr, znet->io_size); free_dma (znet->tx_dma); free_dma (znet->rx_dma); free_irq (dev->irq, dev); } /* Keep the magical SIA stuff in a single function... */ static void znet_transceiver_power (struct net_device *dev, int on) { struct znet_private *znet = netdev_priv(dev); unsigned char v; /* Turn on/off the 82501 SIA, using zenith-specific magic. */ /* Select LAN control register */ outb(0x10, znet->sia_base); if (on) v = inb(znet->sia_base + 1) | 0x84; else v = inb(znet->sia_base + 1) & ~0x84; outb(v, znet->sia_base+1); /* Turn on/off LAN power (bit 2). */ } /* Init the i82593, with current promisc/mcast configuration. Also used from hardware_init. */ static void znet_set_multicast_list (struct net_device *dev) { struct znet_private *znet = netdev_priv(dev); short ioaddr = dev->base_addr; struct i82593_conf_block *cfblk = &znet->i593_init; memset(cfblk, 0x00, sizeof(struct i82593_conf_block)); /* The configuration block. What an undocumented nightmare. The first set of values are those suggested (without explanation) for ethernet in the Intel 82586 databook. The rest appear to be completely undocumented, except for cryptic notes in the Crynwr packet driver. This driver uses the Crynwr values verbatim. */ /* maz : Rewritten to take advantage of the wanvelan includes. At least we have names, not just blind values */ /* Byte 0 */ cfblk->fifo_limit = 10; /* = 16 B rx and 80 B tx fifo thresholds */ cfblk->forgnesi = 0; /* 0=82C501, 1=AMD7992B compatibility */ cfblk->fifo_32 = 1; cfblk->d6mod = 0; /* Run in i82593 advanced mode */ cfblk->throttle_enb = 1; /* Byte 1 */ cfblk->throttle = 8; /* Continuous w/interrupts, 128-clock DMA. */ cfblk->cntrxint = 0; /* enable continuous mode receive interrupts */ cfblk->contin = 1; /* enable continuous mode */ /* Byte 2 */ cfblk->addr_len = ETH_ALEN; cfblk->acloc = 1; /* Disable source addr insertion by i82593 */ cfblk->preamb_len = 2; /* 8 bytes preamble */ cfblk->loopback = 0; /* Loopback off */ /* Byte 3 */ cfblk->lin_prio = 0; /* Default priorities & backoff methods. */ cfblk->tbofstop = 0; cfblk->exp_prio = 0; cfblk->bof_met = 0; /* Byte 4 */ cfblk->ifrm_spc = 6; /* 96 bit times interframe spacing */ /* Byte 5 */ cfblk->slottim_low = 0; /* 512 bit times slot time (low) */ /* Byte 6 */ cfblk->slottim_hi = 2; /* 512 bit times slot time (high) */ cfblk->max_retr = 15; /* 15 collisions retries */ /* Byte 7 */ cfblk->prmisc = ((dev->flags & IFF_PROMISC) ? 1 : 0); /* Promiscuous mode */ cfblk->bc_dis = 0; /* Enable broadcast reception */ cfblk->crs_1 = 0; /* Don't transmit without carrier sense */ cfblk->nocrc_ins = 0; /* i82593 generates CRC */ cfblk->crc_1632 = 0; /* 32-bit Autodin-II CRC */ cfblk->crs_cdt = 0; /* CD not to be interpreted as CS */ /* Byte 8 */ cfblk->cs_filter = 0; /* CS is recognized immediately */ cfblk->crs_src = 0; /* External carrier sense */ cfblk->cd_filter = 0; /* CD is recognized immediately */ /* Byte 9 */ cfblk->min_fr_len = ETH_ZLEN >> 2; /* Minimum frame length */ /* Byte A */ cfblk->lng_typ = 1; /* Type/length checks OFF */ cfblk->lng_fld = 1; /* Disable 802.3 length field check */ cfblk->rxcrc_xf = 1; /* Don't transfer CRC to memory */ cfblk->artx = 1; /* Disable automatic retransmission */ cfblk->sarec = 1; /* Disable source addr trig of CD */ cfblk->tx_jabber = 0; /* Disable jabber jam sequence */ cfblk->hash_1 = 1; /* Use bits 0-5 in mc address hash */ cfblk->lbpkpol = 0; /* Loopback pin active high */ /* Byte B */ cfblk->fdx = 0; /* Disable full duplex operation */ /* Byte C */ cfblk->dummy_6 = 0x3f; /* all ones, Default multicast addresses & backoff. */ cfblk->mult_ia = 0; /* No multiple individual addresses */ cfblk->dis_bof = 0; /* Disable the backoff algorithm ?! */ /* Byte D */ cfblk->dummy_1 = 1; /* set to 1 */ cfblk->tx_ifs_retrig = 3; /* Hmm... Disabled */ cfblk->mc_all = (dev->mc_list || (dev->flags&IFF_ALLMULTI));/* multicast all mode */ cfblk->rcv_mon = 0; /* Monitor mode disabled */ cfblk->frag_acpt = 0; /* Do not accept fragments */ cfblk->tstrttrs = 0; /* No start transmission threshold */ /* Byte E */ cfblk->fretx = 1; /* FIFO automatic retransmission */ cfblk->runt_eop = 0; /* drop "runt" packets */ cfblk->hw_sw_pin = 0; /* ?? */ cfblk->big_endn = 0; /* Big Endian ? no... */ cfblk->syncrqs = 1; /* Synchronous DRQ deassertion... */ cfblk->sttlen = 1; /* 6 byte status registers */ cfblk->rx_eop = 0; /* Signal EOP on packet reception */ cfblk->tx_eop = 0; /* Signal EOP on packet transmission */ /* Byte F */ cfblk->rbuf_size = RX_BUF_SIZE >> 12; /* Set receive buffer size */ cfblk->rcvstop = 1; /* Enable Receive Stop Register */ if (znet_debug > 2) { int i; unsigned char *c; for (i = 0, c = (char *) cfblk; i < sizeof (*cfblk); i++) printk ("%02X ", c[i]); printk ("\n"); } *znet->tx_cur++ = sizeof(struct i82593_conf_block); memcpy(znet->tx_cur, cfblk, sizeof(struct i82593_conf_block)); znet->tx_cur += sizeof(struct i82593_conf_block)/2; outb(OP0_CONFIGURE | CR0_CHNL, ioaddr); /* XXX FIXME maz : Add multicast addresses here, so having a * multicast address configured isn't equal to IFF_ALLMULTI */ } static const struct net_device_ops znet_netdev_ops = { .ndo_open = znet_open, .ndo_stop = znet_close, .ndo_start_xmit = znet_send_packet, .ndo_set_multicast_list = znet_set_multicast_list, .ndo_tx_timeout = znet_tx_timeout, .ndo_change_mtu = eth_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; /* The Z-Note probe is pretty easy. The NETIDBLK exists in the safe-to-probe BIOS area. We just scan for the signature, and pull the vital parameters out of the structure. */ static int __init znet_probe (void) { int i; struct netidblk *netinfo; struct znet_private *znet; struct net_device *dev; char *p; int err = -ENOMEM; /* This code scans the region 0xf0000 to 0xfffff for a "NETIDBLK". */ for(p = (char *)phys_to_virt(0xf0000); p < (char *)phys_to_virt(0x100000); p++) if (*p == 'N' && strncmp(p, "NETIDBLK", 8) == 0) break; if (p >= (char *)phys_to_virt(0x100000)) { if (znet_debug > 1) printk(KERN_INFO "No Z-Note ethernet adaptor found.\n"); return -ENODEV; } dev = alloc_etherdev(sizeof(struct znet_private)); if (!dev) return -ENOMEM; znet = netdev_priv(dev); netinfo = (struct netidblk *)p; dev->base_addr = netinfo->iobase1; dev->irq = netinfo->irq1; /* The station address is in the "netidblk" at 0x0f0000. */ for (i = 0; i < 6; i++) dev->dev_addr[i] = netinfo->netid[i]; printk(KERN_INFO "%s: ZNET at %#3lx, %pM" ", using IRQ %d DMA %d and %d.\n", dev->name, dev->base_addr, dev->dev_addr, dev->irq, netinfo->dma1, netinfo->dma2); if (znet_debug > 1) { printk(KERN_INFO "%s: vendor '%16.16s' IRQ1 %d IRQ2 %d DMA1 %d DMA2 %d.\n", dev->name, netinfo->vendor, netinfo->irq1, netinfo->irq2, netinfo->dma1, netinfo->dma2); printk(KERN_INFO "%s: iobase1 %#x size %d iobase2 %#x size %d net type %2.2x.\n", dev->name, netinfo->iobase1, netinfo->iosize1, netinfo->iobase2, netinfo->iosize2, netinfo->nettype); } if (znet_debug > 0) printk(KERN_INFO "%s", version); znet->rx_dma = netinfo->dma1; znet->tx_dma = netinfo->dma2; spin_lock_init(&znet->lock); znet->sia_base = 0xe6; /* Magic address for the 82501 SIA */ znet->sia_size = 2; /* maz: Despite the '593 being advertised above as using a * single 8bits I/O port, this driver does many 16bits * access. So set io_size accordingly */ znet->io_size = 2; if (!(znet->rx_start = kmalloc (DMA_BUF_SIZE, GFP_KERNEL | GFP_DMA))) goto free_dev; if (!(znet->tx_start = kmalloc (DMA_BUF_SIZE, GFP_KERNEL | GFP_DMA))) goto free_rx; if (!dma_page_eq (znet->rx_start, znet->rx_start + (RX_BUF_SIZE/2-1)) || !dma_page_eq (znet->tx_start, znet->tx_start + (TX_BUF_SIZE/2-1))) { printk (KERN_WARNING "tx/rx crossing DMA frontiers, giving up\n"); goto free_tx; } znet->rx_end = znet->rx_start + RX_BUF_SIZE/2; znet->tx_buf_len = TX_BUF_SIZE/2; znet->tx_end = znet->tx_start + znet->tx_buf_len; /* The ZNET-specific entries in the device structure. */ dev->netdev_ops = &znet_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; err = register_netdev(dev); if (err) goto free_tx; znet_dev = dev; return 0; free_tx: kfree(znet->tx_start); free_rx: kfree(znet->rx_start); free_dev: free_netdev(dev); return err; } static int znet_open(struct net_device *dev) { int ioaddr = dev->base_addr; if (znet_debug > 2) printk(KERN_DEBUG "%s: znet_open() called.\n", dev->name); /* These should never fail. You can't add devices to a sealed box! */ if (znet_request_resources (dev)) { printk(KERN_WARNING "%s: Not opened -- resource busy?!?\n", dev->name); return -EBUSY; } znet_transceiver_power (dev, 1); /* According to the Crynwr driver we should wait 50 msec. for the LAN clock to stabilize. My experiments indicates that the '593 can be initialized immediately. The delay is probably needed for the DC-to-DC converter to come up to full voltage, and for the oscillator to be spot-on at 20Mhz before transmitting. Until this proves to be a problem we rely on the higher layers for the delay and save allocating a timer entry. */ /* maz : Well, I'm getting every time the following message * without the delay on a 486@33. This machine is much too * fast... :-) So maybe the Crynwr driver wasn't wrong after * all, even if the message is completly harmless on my * setup. */ mdelay (50); /* This follows the packet driver's lead, and checks for success. */ if (inb(ioaddr) != 0x10 && inb(ioaddr) != 0x00) printk(KERN_WARNING "%s: Problem turning on the transceiver power.\n", dev->name); hardware_init(dev); netif_start_queue (dev); return 0; } static void znet_tx_timeout (struct net_device *dev) { int ioaddr = dev->base_addr; ushort event, tx_status, rx_offset, state; outb (CR0_STATUS_0, ioaddr); event = inb (ioaddr); outb (CR0_STATUS_1, ioaddr); tx_status = inw (ioaddr); outb (CR0_STATUS_2, ioaddr); rx_offset = inw (ioaddr); outb (CR0_STATUS_3, ioaddr); state = inb (ioaddr); printk (KERN_WARNING "%s: transmit timed out, status %02x %04x %04x %02x," " resetting.\n", dev->name, event, tx_status, rx_offset, state); if (tx_status == TX_LOST_CRS) printk (KERN_WARNING "%s: Tx carrier error, check transceiver cable.\n", dev->name); outb (OP0_RESET, ioaddr); hardware_init (dev); netif_wake_queue (dev); } static netdev_tx_t znet_send_packet(struct sk_buff *skb, struct net_device *dev) { int ioaddr = dev->base_addr; struct znet_private *znet = netdev_priv(dev); unsigned long flags; short length = skb->len; if (znet_debug > 4) printk(KERN_DEBUG "%s: ZNet_send_packet.\n", dev->name); if (length < ETH_ZLEN) { if (skb_padto(skb, ETH_ZLEN)) return NETDEV_TX_OK; length = ETH_ZLEN; } netif_stop_queue (dev); /* Check that the part hasn't reset itself, probably from suspend. */ outb(CR0_STATUS_0, ioaddr); if (inw(ioaddr) == 0x0010 && inw(ioaddr) == 0x0000 && inw(ioaddr) == 0x0010) { if (znet_debug > 1) printk (KERN_WARNING "%s : waking up\n", dev->name); hardware_init(dev); znet_transceiver_power (dev, 1); } if (1) { unsigned char *buf = (void *)skb->data; ushort *tx_link = znet->tx_cur - 1; ushort rnd_len = (length + 1)>>1; dev->stats.tx_bytes+=length; if (znet->tx_cur >= znet->tx_end) znet->tx_cur = znet->tx_start; *znet->tx_cur++ = length; if (znet->tx_cur + rnd_len + 1 > znet->tx_end) { int semi_cnt = (znet->tx_end - znet->tx_cur)<<1; /* Cvrt to byte cnt. */ memcpy(znet->tx_cur, buf, semi_cnt); rnd_len -= semi_cnt>>1; memcpy(znet->tx_start, buf + semi_cnt, length - semi_cnt); znet->tx_cur = znet->tx_start + rnd_len; } else { memcpy(znet->tx_cur, buf, skb->len); znet->tx_cur += rnd_len; } *znet->tx_cur++ = 0; spin_lock_irqsave(&znet->lock, flags); { *tx_link = OP0_TRANSMIT | CR0_CHNL; /* Is this always safe to do? */ outb(OP0_TRANSMIT | CR0_CHNL, ioaddr); } spin_unlock_irqrestore (&znet->lock, flags); dev->trans_start = jiffies; netif_start_queue (dev); if (znet_debug > 4) printk(KERN_DEBUG "%s: Transmitter queued, length %d.\n", dev->name, length); } dev_kfree_skb(skb); return NETDEV_TX_OK; } /* The ZNET interrupt handler. */ static irqreturn_t znet_interrupt(int irq, void *dev_id) { struct net_device *dev = dev_id; struct znet_private *znet = netdev_priv(dev); int ioaddr; int boguscnt = 20; int handled = 0; spin_lock (&znet->lock); ioaddr = dev->base_addr; outb(CR0_STATUS_0, ioaddr); do { ushort status = inb(ioaddr); if (znet_debug > 5) { ushort result, rx_ptr, running; outb(CR0_STATUS_1, ioaddr); result = inw(ioaddr); outb(CR0_STATUS_2, ioaddr); rx_ptr = inw(ioaddr); outb(CR0_STATUS_3, ioaddr); running = inb(ioaddr); printk(KERN_DEBUG "%s: interrupt, status %02x, %04x %04x %02x serial %d.\n", dev->name, status, result, rx_ptr, running, boguscnt); } if ((status & SR0_INTERRUPT) == 0) break; handled = 1; if ((status & SR0_EVENT_MASK) == SR0_TRANSMIT_DONE || (status & SR0_EVENT_MASK) == SR0_RETRANSMIT_DONE || (status & SR0_EVENT_MASK) == SR0_TRANSMIT_NO_CRC_DONE) { int tx_status; outb(CR0_STATUS_1, ioaddr); tx_status = inw(ioaddr); /* It's undocumented, but tx_status seems to match the i82586. */ if (tx_status & TX_OK) { dev->stats.tx_packets++; dev->stats.collisions += tx_status & TX_NCOL_MASK; } else { if (tx_status & (TX_LOST_CTS | TX_LOST_CRS)) dev->stats.tx_carrier_errors++; if (tx_status & TX_UND_RUN) dev->stats.tx_fifo_errors++; if (!(tx_status & TX_HRT_BEAT)) dev->stats.tx_heartbeat_errors++; if (tx_status & TX_MAX_COL) dev->stats.tx_aborted_errors++; /* ...and the catch-all. */ if ((tx_status | (TX_LOST_CRS | TX_LOST_CTS | TX_UND_RUN | TX_HRT_BEAT | TX_MAX_COL)) != (TX_LOST_CRS | TX_LOST_CTS | TX_UND_RUN | TX_HRT_BEAT | TX_MAX_COL)) dev->stats.tx_errors++; /* Transceiver may be stuck if cable * was removed while emiting a * packet. Flip it off, then on to * reset it. This is very empirical, * but it seems to work. */ znet_transceiver_power (dev, 0); znet_transceiver_power (dev, 1); } netif_wake_queue (dev); } if ((status & SR0_RECEPTION) || (status & SR0_EVENT_MASK) == SR0_STOP_REG_HIT) { znet_rx(dev); } /* Clear the interrupts we've handled. */ outb(CR0_INT_ACK, ioaddr); } while (boguscnt--); spin_unlock (&znet->lock); return IRQ_RETVAL(handled); } static void znet_rx(struct net_device *dev) { struct znet_private *znet = netdev_priv(dev); int ioaddr = dev->base_addr; int boguscount = 1; short next_frame_end_offset = 0; /* Offset of next frame start. */ short *cur_frame_end; short cur_frame_end_offset; outb(CR0_STATUS_2, ioaddr); cur_frame_end_offset = inw(ioaddr); if (cur_frame_end_offset == znet->rx_cur - znet->rx_start) { printk(KERN_WARNING "%s: Interrupted, but nothing to receive, offset %03x.\n", dev->name, cur_frame_end_offset); return; } /* Use same method as the Crynwr driver: construct a forward list in the same area of the backwards links we now have. This allows us to pass packets to the upper layers in the order they were received -- important for fast-path sequential operations. */ while (znet->rx_start + cur_frame_end_offset != znet->rx_cur && ++boguscount < 5) { unsigned short hi_cnt, lo_cnt, hi_status, lo_status; int count, status; if (cur_frame_end_offset < 4) { /* Oh no, we have a special case: the frame trailer wraps around the end of the ring buffer. We've saved space at the end of the ring buffer for just this problem. */ memcpy(znet->rx_end, znet->rx_start, 8); cur_frame_end_offset += (RX_BUF_SIZE/2); } cur_frame_end = znet->rx_start + cur_frame_end_offset - 4; lo_status = *cur_frame_end++; hi_status = *cur_frame_end++; status = ((hi_status & 0xff) << 8) + (lo_status & 0xff); lo_cnt = *cur_frame_end++; hi_cnt = *cur_frame_end++; count = ((hi_cnt & 0xff) << 8) + (lo_cnt & 0xff); if (znet_debug > 5) printk(KERN_DEBUG "Constructing trailer at location %03x, %04x %04x %04x %04x" " count %#x status %04x.\n", cur_frame_end_offset<<1, lo_status, hi_status, lo_cnt, hi_cnt, count, status); cur_frame_end[-4] = status; cur_frame_end[-3] = next_frame_end_offset; cur_frame_end[-2] = count; next_frame_end_offset = cur_frame_end_offset; cur_frame_end_offset -= ((count + 1)>>1) + 3; if (cur_frame_end_offset < 0) cur_frame_end_offset += RX_BUF_SIZE/2; }; /* Now step forward through the list. */ do { ushort *this_rfp_ptr = znet->rx_start + next_frame_end_offset; int status = this_rfp_ptr[-4]; int pkt_len = this_rfp_ptr[-2]; if (znet_debug > 5) printk(KERN_DEBUG "Looking at trailer ending at %04x status %04x length %03x" " next %04x.\n", next_frame_end_offset<<1, status, pkt_len, this_rfp_ptr[-3]<<1); /* Once again we must assume that the i82586 docs apply. */ if ( ! (status & RX_RCV_OK)) { /* There was an error. */ dev->stats.rx_errors++; if (status & RX_CRC_ERR) dev->stats.rx_crc_errors++; if (status & RX_ALG_ERR) dev->stats.rx_frame_errors++; #if 0 if (status & 0x0200) dev->stats.rx_over_errors++; /* Wrong. */ if (status & 0x0100) dev->stats.rx_fifo_errors++; #else /* maz : Wild guess... */ if (status & RX_OVRRUN) dev->stats.rx_over_errors++; #endif if (status & RX_SRT_FRM) dev->stats.rx_length_errors++; } else if (pkt_len > 1536) { dev->stats.rx_length_errors++; } else { /* Malloc up new buffer. */ struct sk_buff *skb; skb = dev_alloc_skb(pkt_len); if (skb == NULL) { if (znet_debug) printk(KERN_WARNING "%s: Memory squeeze, dropping packet.\n", dev->name); dev->stats.rx_dropped++; break; } if (&znet->rx_cur[(pkt_len+1)>>1] > znet->rx_end) { int semi_cnt = (znet->rx_end - znet->rx_cur)<<1; memcpy(skb_put(skb,semi_cnt), znet->rx_cur, semi_cnt); memcpy(skb_put(skb,pkt_len-semi_cnt), znet->rx_start, pkt_len - semi_cnt); } else { memcpy(skb_put(skb,pkt_len), znet->rx_cur, pkt_len); if (znet_debug > 6) { unsigned int *packet = (unsigned int *) skb->data; printk(KERN_DEBUG "Packet data is %08x %08x %08x %08x.\n", packet[0], packet[1], packet[2], packet[3]); } } skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; } znet->rx_cur = this_rfp_ptr; if (znet->rx_cur >= znet->rx_end) znet->rx_cur -= RX_BUF_SIZE/2; update_stop_hit(ioaddr, (znet->rx_cur - znet->rx_start)<<1); next_frame_end_offset = this_rfp_ptr[-3]; if (next_frame_end_offset == 0) /* Read all the frames? */ break; /* Done for now */ this_rfp_ptr = znet->rx_start + next_frame_end_offset; } while (--boguscount); /* If any worth-while packets have been received, dev_rint() has done a mark_bh(INET_BH) for us and will work on them when we get to the bottom-half routine. */ return; } /* The inverse routine to znet_open(). */ static int znet_close(struct net_device *dev) { int ioaddr = dev->base_addr; netif_stop_queue (dev); outb(OP0_RESET, ioaddr); /* CMD0_RESET */ if (znet_debug > 1) printk(KERN_DEBUG "%s: Shutting down ethercard.\n", dev->name); /* Turn off transceiver power. */ znet_transceiver_power (dev, 0); znet_release_resources (dev); return 0; } static void show_dma(struct net_device *dev) { short ioaddr = dev->base_addr; unsigned char stat = inb (ioaddr); struct znet_private *znet = netdev_priv(dev); unsigned long flags; short dma_port = ((znet->tx_dma&3)<<2) + IO_DMA2_BASE; unsigned addr = inb(dma_port); short residue; addr |= inb(dma_port) << 8; residue = get_dma_residue(znet->tx_dma); if (znet_debug > 1) { flags=claim_dma_lock(); printk(KERN_DEBUG "Stat:%02x Addr: %04x cnt:%3x\n", stat, addr<<1, residue); release_dma_lock(flags); } } /* Initialize the hardware. We have to do this when the board is open()ed or when we come out of suspend mode. */ static void hardware_init(struct net_device *dev) { unsigned long flags; short ioaddr = dev->base_addr; struct znet_private *znet = netdev_priv(dev); znet->rx_cur = znet->rx_start; znet->tx_cur = znet->tx_start; /* Reset the chip, and start it up. */ outb(OP0_RESET, ioaddr); flags=claim_dma_lock(); disable_dma(znet->rx_dma); /* reset by an interrupting task. */ clear_dma_ff(znet->rx_dma); set_dma_mode(znet->rx_dma, DMA_RX_MODE); set_dma_addr(znet->rx_dma, (unsigned int) znet->rx_start); set_dma_count(znet->rx_dma, RX_BUF_SIZE); enable_dma(znet->rx_dma); /* Now set up the Tx channel. */ disable_dma(znet->tx_dma); clear_dma_ff(znet->tx_dma); set_dma_mode(znet->tx_dma, DMA_TX_MODE); set_dma_addr(znet->tx_dma, (unsigned int) znet->tx_start); set_dma_count(znet->tx_dma, znet->tx_buf_len<<1); enable_dma(znet->tx_dma); release_dma_lock(flags); if (znet_debug > 1) printk(KERN_DEBUG "%s: Initializing the i82593, rx buf %p tx buf %p\n", dev->name, znet->rx_start,znet->tx_start); /* Do an empty configure command, just like the Crynwr driver. This resets to chip to its default values. */ *znet->tx_cur++ = 0; *znet->tx_cur++ = 0; show_dma(dev); outb(OP0_CONFIGURE | CR0_CHNL, ioaddr); znet_set_multicast_list (dev); *znet->tx_cur++ = 6; memcpy(znet->tx_cur, dev->dev_addr, 6); znet->tx_cur += 3; show_dma(dev); outb(OP0_IA_SETUP | CR0_CHNL, ioaddr); show_dma(dev); update_stop_hit(ioaddr, 8192); if (znet_debug > 1) printk(KERN_DEBUG "enabling Rx.\n"); outb(OP0_RCV_ENABLE, ioaddr); netif_start_queue (dev); } static void update_stop_hit(short ioaddr, unsigned short rx_stop_offset) { outb(OP0_SWIT_TO_PORT_1 | CR0_CHNL, ioaddr); if (znet_debug > 5) printk(KERN_DEBUG "Updating stop hit with value %02x.\n", (rx_stop_offset >> 6) | CR1_STOP_REG_UPDATE); outb((rx_stop_offset >> 6) | CR1_STOP_REG_UPDATE, ioaddr); outb(OP1_SWIT_TO_PORT_0, ioaddr); } static __exit void znet_cleanup (void) { if (znet_dev) { struct znet_private *znet = netdev_priv(znet_dev); unregister_netdev (znet_dev); kfree (znet->rx_start); kfree (znet->tx_start); free_netdev (znet_dev); } } module_init (znet_probe); module_exit (znet_cleanup); class='del'>- }
-
- *bhp = bh;
-
- return 0;
-}
-
-/**
- * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read()
- * @bh: The buffer to copy from, or NULL meaning zero the buffer
- * @buf: The buffer to copy/zero
- * @offset: The offset in the buffer to copy from
- * @size: The amount of data to copy/zero
- *
- * Returns: errno
- */
-
-int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset,
- unsigned int size)
-{
- if (bh)
- memcpy(*buf, bh->b_data + offset, size);
- else
- memset(*buf, 0, size);
- *buf += size;
- return 0;
-}
-
-/**
- * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read()
- * @bh: The buffer
- * @buf: The destination of the data
- * @offset: The offset into the buffer
- * @size: The amount of data to copy
- *
- * Returns: errno
- */
-
-int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset,
- unsigned int size)
-{
- int error;
-
- if (bh)
- error = copy_to_user(*buf, bh->b_data + offset, size);
- else
- error = clear_user(*buf, size);
-
- if (error)
- error = -EFAULT;
- else
- *buf += size;
-
- return error;
-}
-
-static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf,
- unsigned int offset, unsigned int size,
- read_copy_fn_t copy_fn)
-{
- struct buffer_head *dibh;
- int error;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (!error) {
- error = copy_fn(dibh, &buf,
- offset + sizeof(struct gfs2_dinode), size);
- brelse(dibh);
- }
-
- return (error) ? error : size;
-}
-
-/**
- * gfs2_jdata_read - Read a jdata file
- * @ip: The GFS2 Inode
- * @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
- * @size: Amount of data to transfer
- * @copy_fn: Function to actually perform the copy
- *
- * The @copy_fn only copies a maximum of a single block at once so
- * we are safe calling it with int arguments. It is done so that
- * we don't needlessly put 64bit arguments on the stack and it
- * also makes the code in the @copy_fn nicer too.
- *
- * Returns: The amount of data actually copied or the error
- */
-
-int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset,
- unsigned int size, read_copy_fn_t copy_fn)
-{
- struct gfs2_sbd *sdp = ip->i_sbd;
- uint64_t lblock, dblock;
- uint32_t extlen = 0;
- unsigned int o;
- int copied = 0;
- int error = 0;
-
- if (offset >= ip->i_di.di_size)
- return 0;
-
- if ((offset + size) > ip->i_di.di_size)
- size = ip->i_di.di_size - offset;
-
- if (!size)
- return 0;
-
- if (gfs2_is_stuffed(ip))
- return jdata_read_stuffed(ip, buf, (unsigned int)offset, size,
- copy_fn);
-
- if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
- return -EINVAL;
-
- lblock = offset;
- o = do_div(lblock, sdp->sd_jbsize) +
- sizeof(struct gfs2_meta_header);
-
- while (copied < size) {
- unsigned int amount;
- struct buffer_head *bh;
- int new;
-
- amount = size - copied;
- if (amount > sdp->sd_sb.sb_bsize - o)
- amount = sdp->sd_sb.sb_bsize - o;
-
- if (!extlen) {
- new = 0;
- error = gfs2_block_map(ip, lblock, &new,
- &dblock, &extlen);
- if (error)
- goto fail;
- }
-
- if (extlen > 1)
- gfs2_meta_ra(ip->i_gl, dblock, extlen);
-
- if (dblock) {
- error = gfs2_jdata_get_buffer(ip, dblock, new, &bh);
- if (error)
- goto fail;
- dblock++;
- extlen--;
- } else
- bh = NULL;
-
- error = copy_fn(bh, &buf, o, amount);
- brelse(bh);
- if (error)
- goto fail;
-
- copied += amount;
- lblock++;
-
- o = sizeof(struct gfs2_meta_header);
- }
-
- return copied;
-
- fail:
- return (copied) ? copied : error;
-}
-
-/**
- * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write()
- * @bh: The buffer to copy to or clear
- * @buf: The buffer to copy from
- * @offset: The offset in the buffer to write to
- * @size: The amount of data to write
- *
- * Returns: errno
- */
-
-int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh,
- const char **buf, unsigned int offset, unsigned int size)
-{
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- memcpy(bh->b_data + offset, *buf, size);
-
- *buf += size;
-
- return 0;
-}
-
-/**
- * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write()
- * @bh: The buffer to copy to or clear
- * @buf: The buffer to copy from
- * @offset: The offset in the buffer to write to
- * @size: The amount of data to write
- *
- * Returns: errno
- */
-
-int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh,
- const char __user **buf, unsigned int offset, unsigned int size)
-{
- int error = 0;
-
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- if (copy_from_user(bh->b_data + offset, *buf, size))
- error = -EFAULT;
- else
- *buf += size;
-
- return error;
-}
-
-static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf,
- unsigned int offset, unsigned int size,
- write_copy_fn_t copy_fn)
-{
- struct buffer_head *dibh;
- int error;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- error = copy_fn(ip,
- dibh, &buf,
- offset + sizeof(struct gfs2_dinode), size);
- if (!error) {
- if (ip->i_di.di_size < offset + size)
- ip->i_di.di_size = offset + size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- }
-
- brelse(dibh);
-
- return (error) ? error : size;
-}
-
-/**
- * gfs2_jdata_write - Write bytes to a file
- * @ip: The GFS2 inode
- * @buf: The buffer containing information to be written
- * @offset: The file offset to start writing at
- * @size: The amount of data to write
- * @copy_fn: Function to do the actual copying
- *
- * Returns: The number of bytes correctly written or error code
- */
-
-int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset,
- unsigned int size, write_copy_fn_t copy_fn)
-{
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct buffer_head *dibh;
- uint64_t lblock, dblock;
- uint32_t extlen = 0;
- unsigned int o;
- int copied = 0;
- int error = 0;
-
- if (!size)
- return 0;
-
- if (gfs2_is_stuffed(ip) &&
- offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
- return jdata_write_stuffed(ip, buf, (unsigned int)offset, size,
- copy_fn);
-
- if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
- return -EINVAL;
-
- if (gfs2_is_stuffed(ip)) {
- error = gfs2_unstuff_dinode(ip, NULL, NULL);
- if (error)
- return error;
- }
-
- lblock = offset;
- o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
-
- while (copied < size) {
- unsigned int amount;
- struct buffer_head *bh;
- int new;
-
- amount = size - copied;
- if (amount > sdp->sd_sb.sb_bsize - o)
- amount = sdp->sd_sb.sb_bsize - o;
-
- if (!extlen) {
- new = 1;
- error = gfs2_block_map(ip, lblock, &new,
- &dblock, &extlen);
- if (error)
- goto fail;
- error = -EIO;
- if (gfs2_assert_withdraw(sdp, dblock))
- goto fail;
- }
-
- error = gfs2_jdata_get_buffer(ip, dblock,
- (amount == sdp->sd_jbsize) ? 1 : new,
- &bh);
- if (error)
- goto fail;
-
- error = copy_fn(ip, bh, &buf, o, amount);
- brelse(bh);
- if (error)
- goto fail;
-
- copied += amount;
- lblock++;
- dblock++;
- extlen--;
-
- o = sizeof(struct gfs2_meta_header);
- }
-
- out:
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
- if (ip->i_di.di_size < offset + copied)
- ip->i_di.di_size = offset + copied;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
-
- return copied;
-
- fail:
- if (copied)
- goto out;
- return error;
-}
-
diff --git a/fs/gfs2/jdata.h b/fs/gfs2/jdata.h
deleted file mode 100644
index 95e18fcb8f82..000000000000
--- a/fs/gfs2/jdata.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-#ifndef __FILE_DOT_H__
-#define __FILE_DOT_H__
-
-int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
- struct buffer_head **bhp);
-
-typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf,
- unsigned int offset, unsigned int size);
-typedef int (*write_copy_fn_t) (struct gfs2_inode *ip,
- struct buffer_head *bh, const char **buf,
- unsigned int offset, unsigned int size);
-
-int gfs2_copy2mem(struct buffer_head *bh, char **buf,
- unsigned int offset, unsigned int size);
-int gfs2_copy2user(struct buffer_head *bh, char __user **buf,
- unsigned int offset, unsigned int size);
-int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf,
- uint64_t offset, unsigned int size,
- read_copy_fn_t copy_fn);
-
-int gfs2_copy_from_mem(struct gfs2_inode *ip,
- struct buffer_head *bh, const char **buf,
- unsigned int offset, unsigned int size);
-int gfs2_copy_from_user(struct gfs2_inode *ip,
- struct buffer_head *bh, const char __user **buf,
- unsigned int offset, unsigned int size);
-int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf,
- uint64_t offset, unsigned int size,
- write_copy_fn_t copy_fn);
-
-static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf,
- uint64_t offset, unsigned int size)
-{
- return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem);
-}
-
-static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf,
- uint64_t offset, unsigned int size)
-{
- return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem);
-}
-
-#endif /* __FILE_DOT_H__ */
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f6d00130f96f..9b4484d366ca 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -387,8 +387,7 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
atomic_set(&bh->b_count, 1);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
- set_bh_page(bh, virt_to_page(real->b_data),
- ((unsigned long)real->b_data) & (PAGE_SIZE - 1));
+ set_bh_page(bh, real->b_page, bh_offset(real));
bh->b_blocknr = blkno;
bh->b_size = sdp->sd_sb.sb_bsize;
bh->b_bdev = sdp->sd_vfs->s_bdev;
@@ -634,6 +633,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
+ gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index a065f7667238..dd41863810d7 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
}
+/**
+ * databuf_lo_add - Add a databuf to the transaction.
+ *
+ * This is used in two distinct cases:
+ * i) In ordered write mode
+ * We put the data buffer on a list so that we can ensure that its
+ * synced to disk at the right time
+ * ii) In journaled data mode
+ * We need to journal the data block in the same way as metadata in
+ * the functions above. The difference is that here we have a tag
+ * which is two __be64's being the block number (as per meta data)
+ * and a flag which says whether the data block needs escaping or
+ * not. This means we need a new log entry for each 251 or so data
+ * blocks, which isn't an enormous overhead but twice as much as
+ * for normal metadata blocks.
+ */
static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
- get_transaction->tr_touched = 1;
+ struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+ struct gfs2_trans *tr = get_transaction;
+ struct address_space *mapping = bd->bd_bh->b_page->mapping;
+ struct gfs2_inode *ip = get_v2ip(mapping->host);
+ tr->tr_touched = 1;
+ if (!list_empty(&bd->bd_list_tr) &&
+ (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+ tr->tr_num_buf++;
+ gfs2_trans_add_gl(bd->bd_gl);
+ list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+ gfs2_pin(sdp, bd->bd_bh);
+ } else {
+ clear_buffer_pinned(bd->bd_bh);
+ }
gfs2_log_lock(sdp);
+ if (ip->i_di.di_flags & GFS2_DIF_JDATA)
+ sdp->sd_log_num_jdata++;
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
gfs2_log_unlock(sdp);
}
+static int gfs2_check_magic(struct buffer_head *bh)
+{
+ struct page *page = bh->b_page;
+ void *kaddr;
+ __be32 *ptr;
+ int rv = 0;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ ptr = kaddr + bh_offset(bh);
+ if (*ptr == cpu_to_be32(GFS2_MAGIC))
+ rv = 1;
+ kunmap_atomic(page, KM_USER0);
+
+ return rv;
+}
+
+/**
+ * databuf_lo_before_commit - Scan the data buffers, writing as we go
+ *
+ * Here we scan through the lists of buffers and make the assumption
+ * that any buffer thats been pinned is being journaled, and that
+ * any unpinned buffer is an ordered write data buffer and therefore
+ * will be written back rather than journaled.
+ */
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
- struct list_head *head = &sdp->sd_log_le_databuf;
LIST_HEAD(started);
- struct gfs2_bufdata *bd;
- struct buffer_head *bh;
+ struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
+ struct buffer_head *bh = NULL;
+ unsigned int offset = sizeof(struct gfs2_log_descriptor);
+ struct gfs2_log_descriptor *ld;
+ unsigned int limit;
+ unsigned int total_dbuf = sdp->sd_log_num_databuf;
+ unsigned int total_jdata = sdp->sd_log_num_jdata;
+ unsigned int num, n;
+ __be64 *ptr;
- while (!list_empty(head)) {
- bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list);
- list_move(&bd->bd_le.le_list, &started);
+ offset += (2*sizeof(__be64) - 1);
+ offset &= ~(2*sizeof(__be64) - 1);
+ limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
- gfs2_log_lock(sdp);
- bh = bd->bd_bh;
+ /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
+ /*
+ * Start writing ordered buffers, write journaled buffers
+ * into the log along with a header
+ */
+ bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list);
+ while(total_dbuf) {
+ num = total_jdata;
+ if (num > limit)
+ num = limit;
+ n = 0;
+ list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) {
+ gfs2_log_lock(sdp);
+ /* An ordered write buffer */
+ if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
+ list_move(&bd1->bd_le.le_list, &started);
+ if (bd1 == bd2) {
+ bd2 = NULL;
+ bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list);
+ }
+ total_dbuf--;
+ if (bd1->bd_bh) {
+ get_bh(bd1->bd_bh);
+ gfs2_log_unlock(sdp);
+ if (buffer_dirty(bd1->bd_bh)) {
+ wait_on_buffer(bd1->bd_bh);
+ ll_rw_block(WRITE, 1, &bd1->bd_bh);
+ }
+ brelse(bd1->bd_bh);
+ continue;
+ }
+ gfs2_log_unlock(sdp);
+ continue;
+ } else if (bd1->bd_bh) { /* A journaled buffer */
+ int magic;
+ gfs2_log_unlock(sdp);
+ /* printk(KERN_INFO "journaled buffer\n"); */
+ if (!bh) {
+ bh = gfs2_log_get_buf(sdp);
+ ld = (struct gfs2_log_descriptor *)bh->b_data;
+ ptr = (__be64 *)(bh->b_data + offset);
+ ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+ ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
+ ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
+ ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA);
+ ld->ld_length = cpu_to_be32(num + 1);
+ ld->ld_data1 = cpu_to_be32(num);
+ ld->ld_data2 = cpu_to_be32(0);
+ memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
+ }
+ magic = gfs2_check_magic(bd1->bd_bh);
+ *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+ *ptr++ = cpu_to_be64((__u64)magic);
+ clear_buffer_escaped(bd1->bd_bh);
+ if (unlikely(magic != 0))
+ set_buffer_escaped(bd1->bd_bh);
+ if (n++ > num)
+ break;
+ }
+ }
if (bh) {
- get_bh(bh);
- gfs2_log_unlock(sdp);
- if (buffer_dirty(bh)) {
- wait_on_buffer(bh);
- ll_rw_block(WRITE, 1, &bh);
+ set_buffer_dirty(bh);
+ ll_rw_block(WRITE, 1, &bh);
+ bh = NULL;
+ }
+ n = 0;
+ /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
+ list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) {
+ if (!bd2->bd_bh)
+ continue;
+ /* copy buffer if it needs escaping */
+ if (unlikely(buffer_escaped(bd2->bd_bh))) {
+ void *kaddr;
+ struct page *page = bd2->bd_bh->b_page;
+ bh = gfs2_log_get_buf(sdp);
+ kaddr = kmap_atomic(page, KM_USER0);
+ memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize);
+ kunmap_atomic(page, KM_USER0);
+ *(__be32 *)bh->b_data = 0;
+ } else {
+ bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
}
- brelse(bh);
- } else
- gfs2_log_unlock(sdp);
+ set_buffer_dirty(bh);
+ ll_rw_block(WRITE, 1, &bh);
+ if (++n >= num)
+ break;
+ }
+ bh = NULL;
+ total_dbuf -= num;
+ total_jdata -= num;
}
-
+ /* printk(KERN_INFO "wait on ordered data buffers\n"); */
+ /* Wait on all ordered buffers */
while (!list_empty(&started)) {
- bd = list_entry(started.next, struct gfs2_bufdata,
- bd_le.le_list);
- list_del(&bd->bd_le.le_list);
+ bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list);
+ list_del(&bd1->bd_le.le_list);
sdp->sd_log_num_databuf--;
gfs2_log_lock(sdp);
- bh = bd->bd_bh;
+ bh = bd1->bd_bh;
if (bh) {
set_v2bd(bh, NULL);
gfs2_log_unlock(sdp);
@@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
} else
gfs2_log_unlock(sdp);
- kfree(bd);
+ kfree(bd1);
}
+ /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */
+ /* We've removed all the ordered write bufs here, so only jdata left */
+ gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
+}
+
+static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
+ struct gfs2_log_descriptor *ld,
+ __be64 *ptr, int pass)
+{
+ struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+ struct gfs2_glock *gl = jd->jd_inode->i_gl;
+ unsigned int blks = be32_to_cpu(ld->ld_data1);
+ struct buffer_head *bh_log, *bh_ip;
+ uint64_t blkno;
+ uint64_t esc;
+ int error = 0;
+
+ if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
+ return 0;
+
+ gfs2_replay_incr_blk(sdp, &start);
+ for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+ blkno = be64_to_cpu(*ptr++);
+ esc = be64_to_cpu(*ptr++);
+
+ sdp->sd_found_blocks++;
+
+ if (gfs2_revoke_check(sdp, blkno, start))
+ continue;
+
+ error = gfs2_replay_read_block(jd, start, &bh_log);
+ if (error)
+ return error;
+
+ bh_ip = gfs2_meta_new(gl, blkno);
+ memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+ /* Unescape */
+ if (esc) {
+ __be32 *eptr = (__be32 *)bh_ip->b_data;
+ *eptr = cpu_to_be32(GFS2_MAGIC);
+ }
+ mark_buffer_dirty(bh_ip);
+
+ brelse(bh_log);
+ brelse(bh_ip);
+ if (error)
+ break;
+
+ sdp->sd_replayed_blocks++;
+ }
+
+ return error;
+}
+
+/* FIXME: sort out accounting for log blocks etc. */
+
+static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
+{
+ struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+ if (error) {
+ gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+ return;
+ }
+ if (pass != 1)
+ return;
+
+ /* data sync? */
+ gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+
+ fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
+ jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
+}
+
+static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+ struct list_head *head = &sdp->sd_log_le_databuf;
+ struct gfs2_bufdata *bd;
+
+ while (!list_empty(head)) {
+ bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+ list_del_init(&bd->bd_le.le_list);
+ sdp->sd_log_num_databuf--;
+ sdp->sd_log_num_jdata--;
+ gfs2_unpin(sdp, bd->bd_bh, ai);
+ brelse(bd->bd_bh);
+ kfree(bd);
+ }
gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
+ gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
}
+
struct gfs2_log_operations gfs2_glock_lops = {
.lo_add = glock_lo_add,
.lo_after_commit = glock_lo_after_commit,
@@ -519,7 +749,11 @@ struct gfs2_log_operations gfs2_rg_lops = {
struct gfs2_log_operations gfs2_databuf_lops = {
.lo_add = databuf_lo_add,
+ .lo_incore_commit = buf_lo_incore_commit,
.lo_before_commit = databuf_lo_before_commit,
+ .lo_after_commit = databuf_lo_after_commit,
+ .lo_scan_elements = databuf_lo_scan_elements,
+ .lo_after_scan = databuf_lo_after_scan,
.lo_name = "databuf"
};
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index b6bd2ebfc2cc..ef58d43b67ee 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -547,10 +547,12 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta
{
struct gfs2_bufdata *bd;
- lock_page(bh->b_page);
+ if (meta)
+ lock_page(bh->b_page);
if (get_v2bd(bh)) {
- unlock_page(bh->b_page);
+ if (meta)
+ unlock_page(bh->b_page);
return;
}
@@ -563,14 +565,16 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta
bd->bd_gl = gl;
INIT_LIST_HEAD(&bd->bd_list_tr);
- if (meta)
+ if (meta) {
lops_init_le(&bd->bd_le, &gfs2_buf_lops);
- else
+ } else {
lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
-
+ get_bh(bh);
+ }
set_v2bd(bh, bd);
- unlock_page(bh->b_page);
+ if (meta)
+ unlock_page(bh->b_page);
}
/**
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d611b2ad2e97..b14357e89421 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -20,13 +20,13 @@
#include "bmap.h"
#include "glock.h"
#include "inode.h"
-#include "jdata.h"
#include "log.h"
#include "meta_io.h"
#include "ops_address.h"
#include "page.h"
#include "quota.h"
#include "trans.h"
+#include "rgrp.h"
/**
* gfs2_get_block - Fills in a buffer head with details about a block
@@ -149,33 +149,55 @@ static int get_blocks_noalloc(struct inode *inode, sector_t lblock,
*
* Returns: errno
*
- * Use Linux VFS block_write_full_page() to write one page,
- * using GFS2's get_block_noalloc to find which blocks to write.
+ * Some of this is copied from block_write_full_page() although we still
+ * call it to do most of the work.
*/
static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = get_v2ip(page->mapping->host);
struct gfs2_sbd *sdp = ip->i_sbd;
+ loff_t i_size = i_size_read(inode);
+ pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ unsigned offset;
int error;
+ int done_trans = 0;
atomic_inc(&sdp->sd_ops_address);
-
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
unlock_page(page);
return -EIO;
}
- if (get_transaction) {
- redirty_page_for_writepage(wbc, page);
+ if (get_transaction)
+ goto out_ignore;
+
+ /* Is the page fully outside i_size? (truncate in progress) */
+ offset = i_size & (PAGE_CACHE_SIZE-1);
+ if (page->index >= end_index+1 || !offset) {
+ page->mapping->a_ops->invalidatepage(page, 0);
unlock_page(page);
- return 0;
+ return 0; /* don't care */
}
- error = block_write_full_page(page, get_block_noalloc, wbc);
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+ error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+ if (error)
+ goto out_ignore;
+ gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+ done_trans = 1;
+ }
+ error = block_write_full_page(page, get_block_noalloc, wbc);
+ if (done_trans)
+ gfs2_trans_end(sdp);
gfs2_meta_cache_flush(ip);
-
return error;
+
+out_ignore:
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
}
/**
@@ -227,40 +249,9 @@ static int zero_readpage(struct page *page)
}
/**
- * jdata_readpage - readpage that goes through gfs2_jdata_read_mem()
- * @ip:
- * @page: The page to read
- *
- * Returns: errno
- */
-
-static int jdata_readpage(struct gfs2_inode *ip, struct page *page)
-{
- void *kaddr;
- int ret;
-
- kaddr = kmap(page);
-
- ret = gfs2_jdata_read_mem(ip, kaddr,
- (uint64_t)page->index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE);
- if (ret >= 0) {
- if (ret < PAGE_CACHE_SIZE)
- memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
- SetPageUptodate(page);
- ret = 0;
- }
-
- kunmap(page);
-
- unlock_page(page);
-
- return ret;
-}
-
-/**
* gfs2_readpage - readpage with locking
- * @file: The file to read a page for
+ * @file: The file to read a page for. N.B. This may be NULL if we are
+ * reading an internal file.
* @page: The page to read
*
* Returns: errno
@@ -270,31 +261,35 @@ static int gfs2_readpage(struct file *file, struct page *page)
{
struct gfs2_inode *ip = get_v2ip(page->mapping->host);
struct gfs2_sbd *sdp = ip->i_sbd;
+ struct gfs2_holder gh;
int error;
atomic_inc(&sdp->sd_ops_address);
- if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) {
- unlock_page(page);
- return -EOPNOTSUPP;
- }
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+ error = gfs2_glock_nq_m_atime(1, &gh);
+ if (error)
+ goto out_unlock;
- if (!gfs2_is_jdata(ip)) {
- if (gfs2_is_stuffed(ip)) {
- if (!page->index) {
- error = stuffed_readpage(ip, page);
- unlock_page(page);
- } else
- error = zero_readpage(page);
+ if (gfs2_is_stuffed(ip)) {
+ if (!page->index) {
+ error = stuffed_readpage(ip, page);
+ unlock_page(page);
} else
- error = mpage_readpage(page, gfs2_get_block);
+ error = zero_readpage(page);
} else
- error = jdata_readpage(ip, page);
+ error = mpage_readpage(page, gfs2_get_block);
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = -EIO;
+ gfs2_glock_dq_m(1, &gh);
+ gfs2_holder_uninit(&gh);
+out:
return error;
+out_unlock:
+ unlock_page(page);
+ goto out;
}
/**
@@ -312,28 +307,82 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
{
struct gfs2_inode *ip = get_v2ip(page->mapping->host);
struct gfs2_sbd *sdp = ip->i_sbd;
+ unsigned int data_blocks, ind_blocks, rblocks;
+ int alloc_required;
int error = 0;
+ loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
+ loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+ struct gfs2_alloc *al;
atomic_inc(&sdp->sd_ops_address);
- if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
- return -EOPNOTSUPP;
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
+ error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
+ if (error)
+ goto out_uninit;
- if (gfs2_is_stuffed(ip)) {
- uint64_t file_size;
- file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
+ gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
+
+ error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
+ if (error)
+ goto out_unlock;
- if (file_size > sdp->sd_sb.sb_bsize -
- sizeof(struct gfs2_dinode)) {
- error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
- page);
- if (!error)
- error = block_prepare_write(page, from, to,
- gfs2_get_block);
- } else if (!PageUptodate(page))
+
+ if (alloc_required) {
+ al = gfs2_alloc_get(ip);
+
+ error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+ if (error)
+ goto out_alloc_put;
+
+ error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+ if (error)
+ goto out_qunlock;
+
+ al->al_requested = data_blocks + ind_blocks;
+ error = gfs2_inplace_reserve(ip);
+ if (error)
+ goto out_qunlock;
+ }
+
+ rblocks = RES_DINODE + ind_blocks;
+ if (gfs2_is_jdata(ip))
+ rblocks += data_blocks ? data_blocks : 1;
+ if (ind_blocks || data_blocks)
+ rblocks += RES_STATFS + RES_QUOTA;
+
+ error = gfs2_trans_begin(sdp, rblocks, 0);
+ if (error)
+ goto out;
+
+ if (gfs2_is_stuffed(ip)) {
+ if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+ error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, page);
+ if (error)
+ goto out;
+ } else if (!PageUptodate(page)) {
error = stuffed_readpage(ip, page);
- } else
- error = block_prepare_write(page, from, to, gfs2_get_block);
+ goto out;
+ }
+ }
+
+ error = block_prepare_write(page, from, to, gfs2_get_block);
+
+out:
+ if (error) {
+ gfs2_trans_end(sdp);
+ if (alloc_required) {
+ gfs2_inplace_release(ip);
+out_qunlock:
+ gfs2_quota_unlock(ip);
+out_alloc_put:
+ gfs2_alloc_put(ip);
+ }
+out_unlock:
+ gfs2_glock_dq_m(1, &ip->i_gh);
+out_uninit:
+ gfs2_holder_uninit(&ip->i_gh);
+ }
return error;
}
@@ -354,48 +403,73 @@ static int gfs2_commit_write(struct file *file, struct page *page,
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = get_v2ip(inode);
struct gfs2_sbd *sdp = ip->i_sbd;
- int error;
+ int error = -EOPNOTSUPP;
+ struct buffer_head *dibh;
+ struct gfs2_alloc *al = &ip->i_alloc;;
atomic_inc(&sdp->sd_ops_address);
+
+ if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
+ goto fail_nounlock;
+
+ error = gfs2_meta_inode_buffer(ip, &dibh);
+ if (error)
+ goto fail_endtrans;
+
+ gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
if (gfs2_is_stuffed(ip)) {
- struct buffer_head *dibh;
uint64_t file_size;
void *kaddr;
file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail;
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
- kaddr = kmap(page);
+ kaddr = kmap_atomic(page, KM_USER0);
memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
- (char *)kaddr + from,
- to - from);
- kunmap(page);
-
- brelse(dibh);
+ (char *)kaddr + from, to - from);
+ kunmap_atomic(page, KM_USER0);
SetPageUptodate(page);
if (inode->i_size < file_size)
i_size_write(inode, file_size);
} else {
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
gfs2_page_add_databufs(ip, page, from, to);
error = generic_commit_write(file, page, from, to);
if (error)
goto fail;
}
+ if (ip->i_di.di_size < inode->i_size)
+ ip->i_di.di_size = inode->i_size;
+
+ gfs2_dinode_out(&ip->i_di, dibh->b_data);
+ brelse(dibh);
+ gfs2_trans_end(sdp);
+ if (al->al_requested) {
+ gfs2_inplace_release(ip);
+ gfs2_quota_unlock(ip);
+ gfs2_alloc_put(ip);
+ }
+ gfs2_glock_dq_m(1, &ip->i_gh);
+ gfs2_holder_uninit(&ip->i_gh);
return 0;
- fail:
+fail:
+ brelse(dibh);
+fail_endtrans:
+ gfs2_trans_end(sdp);
+ if (al->al_requested) {
+ gfs2_inplace_release(ip);
+ gfs2_quota_unlock(ip);
+ gfs2_alloc_put(ip);
+ }
+ gfs2_glock_dq_m(1, &ip->i_gh);
+ gfs2_holder_uninit(&ip->i_gh);
+fail_nounlock:
ClearPageUptodate(page);
-
return error;
}
@@ -492,12 +566,16 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *io
atomic_inc(&sdp->sd_ops_address);
- if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
- gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+ if (gfs2_is_jdata(ip))
return -EINVAL;
- if (rw == WRITE && !get_transaction)
- gb = get_blocks_noalloc;
+ if (rw == WRITE) {
+ return -EOPNOTSUPP; /* for now */
+ } else {
+ if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
+ gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+ return -EINVAL;
+ }
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gb, NULL);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 0f356fc4690c..56820b39a993 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/gfs2_ioctl.h>
+#include <linux/fs.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
@@ -27,7 +28,6 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
-#include "jdata.h"
#include "lm.h"
#include "log.h"
#include "meta_io.h"
@@ -67,10 +67,37 @@ struct filldir_reg {
void *fdr_opaque;
};
-typedef ssize_t(*do_rw_t) (struct file *file,
- char __user *buf,
- size_t size, loff_t *offset,
- unsigned int num_gh, struct gfs2_holder *ghs);
+static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
+ unsigned long offset, unsigned long size)
+{
+ char *kaddr;
+ unsigned long count = desc->count;
+
+ if (size > count)
+ size = count;
+
+ kaddr = kmap(page);
+ memcpy(desc->arg.buf, kaddr + offset, size);
+ kunmap(page);
+
+ desc->count = count - size;
+ desc->written += size;
+ desc->arg.buf += size;
+ return size;
+}
+
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+ char *buf, loff_t *pos, unsigned size)
+{
+ struct inode *inode = ip->i_vnode;
+ read_descriptor_t desc;
+ desc.written = 0;
+ desc.arg.buf = buf;
+ desc.count = size;
+ desc.error = 0;
+ do_generic_mapping_read(inode->i_mapping, ra_state, NULL, pos, &desc, gfs2_read_actor);
+ return desc.written ? desc.written : desc.error;
+}
/**
* gfs2_llseek - seek to a location in a file
@@ -105,247 +132,114 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
return error;
}
-static inline unsigned int vma2state(struct vm_area_struct *vma)
-{
- if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
- (VM_MAYWRITE | VM_MAYSHARE))
- return LM_ST_EXCLUSIVE;
- return LM_ST_SHARED;
-}
-static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, do_rw_t operation)
+static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs)
{
- struct gfs2_holder *ghs;
- unsigned int num_gh = 0;
- ssize_t count;
- struct super_block *sb = file->f_dentry->d_inode->i_sb;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start = (unsigned long)buf;
- unsigned long end = start + size;
- int dumping = (current->flags & PF_DUMPCORE);
- unsigned int x = 0;
-
- for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
- if (end <= vma->vm_start)
- break;
- if (vma->vm_file &&
- vma->vm_file->f_dentry->d_inode->i_sb == sb) {
- num_gh++;
- }
- }
-
- ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
- if (!ghs) {
- if (!dumping)
- up_read(&mm->mmap_sem);
- return -ENOMEM;
- }
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ ssize_t retval;
- for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
- if (end <= vma->vm_start)
- break;
- if (vma->vm_file) {
- struct inode *inode = vma->vm_file->f_dentry->d_inode;
- if (inode->i_sb == sb)
- gfs2_holder_init(get_v2ip(inode)->i_gl,
- vma2state(vma), 0, &ghs[x++]);
- }
+ retval = filemap_write_and_wait(mapping);
+ if (retval == 0) {
+ retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
+ nr_segs);
}
-
- if (!dumping)
- up_read(&mm->mmap_sem);
-
- gfs2_assert(get_v2sdp(sb), x == num_gh);
-
- count = operation(file, buf, size, offset, num_gh, ghs);
-
- while (num_gh--)
- gfs2_holder_uninit(&ghs[num_gh]);
- kfree(ghs);
-
- return count;
+ return retval;
}
/**
- * walk_vm - Walk the vmas associated with a buffer for read or write.
- * If any of them are gfs2, pass the gfs2 inode down to the read/write
- * worker function so that locks can be acquired in the correct order.
- * @file: The file to read/write from/to
- * @buf: The buffer to copy to/from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @operation: The read or write worker function
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
+ * __gfs2_file_aio_read - The main GFS2 read function
+ *
+ * N.B. This is almost, but not quite the same as __generic_file_aio_read()
+ * the important subtle different being that inode->i_size isn't valid
+ * unless we are holding a lock, and we do this _only_ on the O_DIRECT
+ * path since otherwise locking is done entirely at the page cache
+ * layer.
*/
-
-static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, do_rw_t operation)
+static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
+ const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
{
+ struct file *filp = iocb->ki_filp;
+ struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host);
struct gfs2_holder gh;
-
- if (current->mm) {
- struct super_block *sb = file->f_dentry->d_inode->i_sb;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start = (unsigned long)buf;
- unsigned long end = start + size;
- int dumping = (current->flags & PF_DUMPCORE);
-
- if (!dumping)
- down_read(&mm->mmap_sem);
-
- for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
- if (end <= vma->vm_start)
- break;
- if (vma->vm_file &&
- vma->vm_file->f_dentry->d_inode->i_sb == sb)
- goto do_locks;
- }
-
- if (!dumping)
- up_read(&mm->mmap_sem);
- }
-
- return operation(file, buf, size, offset, 0, &gh);
-
-do_locks:
- return walk_vm_hard(file, buf, size, offset, operation);
-}
-
-static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size,
- loff_t *offset)
-{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- ssize_t count = 0;
-
- if (*offset < 0)
+ ssize_t retval;
+ unsigned long seg;
+ size_t count;
+
+ count = 0;
+ for (seg = 0; seg < nr_segs; seg++) {
+ const struct iovec *iv = &iov[seg];
+
+ /*
+ * If any segment has a negative length, or the cumulative
+ * length ever wraps negative then return -EINVAL.
+ */
+ count += iv->iov_len;
+ if (unlikely((ssize_t)(count|iv->iov_len) < 0))
return -EINVAL;
- if (!access_ok(VERIFY_WRITE, buf, size))
+ if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+ continue;
+ if (seg == 0)
return -EFAULT;
+ nr_segs = seg;
+ count -= iv->iov_len; /* This segment is no good */
+ break;
+ }
+
+ /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
+ if (filp->f_flags & O_DIRECT) {
+ loff_t pos = *ppos, size;
+ struct address_space *mapping;
+ struct inode *inode;
+
+ mapping = filp->f_mapping;
+ inode = mapping->host;
+ retval = 0;
+ if (!count)
+ goto out; /* skip atime */
+
+ gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+ retval = gfs2_glock_nq_m_atime(1, &gh);
+ if (retval)
+ goto out;
- if (!(file->f_flags & O_LARGEFILE)) {
- if (*offset >= MAX_NON_LFS)
- return -EFBIG;
- if (*offset + size > MAX_NON_LFS)
- size = MAX_NON_LFS - *offset;
- }
-
- count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
-
- if (count > 0)
- *offset += count;
-
- return count;
-}
-
-/**
- * do_read_direct - Read bytes from a file
- * @file: The file to read from
- * @buf: The buffer to copy into
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @ghs: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes read
- *
- * Returns: The number of bytes read, errno on failure
- */
-
-static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
-{
- struct inode *inode = file->f_mapping->host;
- struct gfs2_inode *ip = get_v2ip(inode);
- unsigned int state = LM_ST_DEFERRED;
- int flags = 0;
- unsigned int x;
- ssize_t count = 0;
- int error;
-
- for (x = 0; x < num_gh; x++)
- if (ghs[x].gh_gl == ip->i_gl) {
- state = LM_ST_SHARED;
- flags |= GL_LOCAL_EXCL;
- break;
+ size = i_size_read(inode);
+ if (pos < size) {
+ retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
+ if (retval > 0 && !is_sync_kiocb(iocb))
+ retval = -EIOCBQUEUED;
+ if (retval > 0)
+ *ppos = pos + retval;
}
-
- gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m(num_gh + 1, ghs);
- if (error)
+ file_accessed(filp);
+ gfs2_glock_dq_m(1, &gh);
+ gfs2_holder_uninit(&gh);
goto out;
+ }
- error = -EINVAL;
- if (gfs2_is_jdata(ip))
- goto out_gunlock;
-
- if (gfs2_is_stuffed(ip)) {
- size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
-
- if (((*offset) & mask) || (((unsigned long)buf) & mask))
- goto out_gunlock;
-
- count = do_jdata_read(file, buf, size & ~mask, offset);
- } else
- count = generic_file_read(file, buf, size, offset);
-
- error = 0;
-
- out_gunlock:
- gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
-}
-
-/**
- * do_read_buf - Read bytes from a file
- * @file: The file to read from
- * @buf: The buffer to copy into
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @ghs: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes read
- *
- * Returns: The number of bytes read, errno on failure
- */
-
-static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
-{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- ssize_t count = 0;
- int error;
-
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
- if (error)
- goto out;
-
- if (gfs2_is_jdata(ip))
- count = do_jdata_read(file, buf, size, offset);
- else
- count = generic_file_read(file, buf, size, offset);
-
- gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
+ retval = 0;
+ if (count) {
+ for (seg = 0; seg < nr_segs; seg++) {
+ read_descriptor_t desc;
+
+ desc.written = 0;
+ desc.arg.buf = iov[seg].iov_base;
+ desc.count = iov[seg].iov_len;
+ if (desc.count == 0)
+ continue;
+ desc.error = 0;
+ do_generic_file_read(filp,ppos,&desc,file_read_actor);
+ retval += desc.written;
+ if (desc.error) {
+ retval = retval ?: desc.error;
+ break;
+ }
+ }
+ }
+out:
+ return retval;
}
/**
@@ -360,550 +254,49 @@ static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
* Returns: The number of bytes read, errno on failure
*/
-static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size,
+static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
loff_t *offset)
{
- atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
-
- if (file->f_flags & O_DIRECT)
- return walk_vm(file, buf, size, offset, do_read_direct);
- else
- return walk_vm(file, buf, size, offset, do_read_buf);
-}
-
-/**
- * grope_mapping - feel up a mapping that needs to be written
- * @buf: the start of the memory to be written
- * @size: the size of the memory to be written
- *
- * We do this after acquiring the locks on the mapping,
- * but before starting the write transaction. We need to make
- * sure that we don't cause recursive transactions if blocks
- * need to be allocated to the file backing the mapping.
- *
- * Returns: errno
- */
-
-static int grope_mapping(const char __user *buf, size_t size)
-{
- const char __user *stop = buf + size;
- char c;
-
- while (buf < stop) {
- if (copy_from_user(&c, buf, 1))
- return -EFAULT;
- buf += PAGE_CACHE_SIZE;
- buf = (const char __user *)PAGE_ALIGN((unsigned long)buf);
- }
-
- return 0;
-}
-
-/**
- * do_write_direct_alloc - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size,
- loff_t *offset)
-{
- struct inode *inode = file->f_mapping->host;
- struct gfs2_inode *ip = get_v2ip(inode);
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct gfs2_alloc *al = NULL;
struct iovec local_iov = { .iov_base = buf, .iov_len = size };
- struct buffer_head *dibh;
- unsigned int data_blocks, ind_blocks;
- ssize_t count;
- int error;
-
- gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
-
- al = gfs2_alloc_get(ip);
-
- error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
- if (error)
- goto fail;
-
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
- if (error)
- goto fail_gunlock_q;
-
- al->al_requested = data_blocks + ind_blocks;
-
- error = gfs2_inplace_reserve(ip);
- if (error)
- goto fail_gunlock_q;
-
- error = gfs2_trans_begin(sdp,
- al->al_rgd->rd_ri.ri_length + ind_blocks +
- RES_DINODE + RES_STATFS + RES_QUOTA, 0);
- if (error)
- goto fail_ipres;
-
- if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
- (~(S_ISUID | S_ISGID)) : (~S_ISUID);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
-
- if (gfs2_is_stuffed(ip)) {
- error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
- if (error)
- goto fail_end_trans;
- }
-
- count = generic_file_write_nolock(file, &local_iov, 1, offset);
- if (count < 0) {
- error = count;
- goto fail_end_trans;
- }
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- if (ip->i_di.di_size < inode->i_size)
- ip->i_di.di_size = inode->i_size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
-
- gfs2_trans_end(sdp);
+ struct kiocb kiocb;
+ ssize_t ret;
- if (file->f_flags & O_SYNC)
- gfs2_log_flush_glock(ip->i_gl);
-
- gfs2_inplace_release(ip);
- gfs2_quota_unlock(ip);
- gfs2_alloc_put(ip);
-
- if (file->f_mapping->nrpages) {
- error = filemap_fdatawrite(file->f_mapping);
- if (!error)
- error = filemap_fdatawait(file->f_mapping);
- }
- if (error)
- return error;
-
- return count;
-
- fail_end_trans:
- gfs2_trans_end(sdp);
-
- fail_ipres:
- gfs2_inplace_release(ip);
-
- fail_gunlock_q:
- gfs2_quota_unlock(ip);
-
- fail:
- gfs2_alloc_put(ip);
+ atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
- return error;
-}
-
-/**
- * do_write_direct - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @gh: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
-{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct gfs2_file *fp = get_v2fp(file);
- unsigned int state = LM_ST_DEFERRED;
- int alloc_required;
- unsigned int x;
- size_t s;
- ssize_t count = 0;
- int error;
-
- if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
- state = LM_ST_EXCLUSIVE;
- else
- for (x = 0; x < num_gh; x++)
- if (ghs[x].gh_gl == ip->i_gl) {
- state = LM_ST_EXCLUSIVE;
- break;
- }
-
- restart:
- gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m(num_gh + 1, ghs);
- if (error)
- goto out;
-
- error = -EINVAL;
- if (gfs2_is_jdata(ip))
- goto out_gunlock;
-
- if (num_gh) {
- error = grope_mapping(buf, size);
- if (error)
- goto out_gunlock;
- }
-
- if (file->f_flags & O_APPEND)
- *offset = ip->i_di.di_size;
-
- if (!(file->f_flags & O_LARGEFILE)) {
- error = -EFBIG;
- if (*offset >= MAX_NON_LFS)
- goto out_gunlock;
- if (*offset + size > MAX_NON_LFS)
- size = MAX_NON_LFS - *offset;
- }
-
- if (gfs2_is_stuffed(ip) ||
- *offset + size > ip->i_di.di_size ||
- ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
- alloc_required = 1;
- else {
- error = gfs2_write_alloc_required(ip, *offset, size,
- &alloc_required);
- if (error)
- goto out_gunlock;
- }
-
- if (alloc_required && state != LM_ST_EXCLUSIVE) {
- gfs2_glock_dq_m(num_gh + 1, ghs);
- gfs2_holder_uninit(&ghs[num_gh]);
- state = LM_ST_EXCLUSIVE;
- goto restart;
- }
-
- if (alloc_required) {
- set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
-
- /* split large writes into smaller atomic transactions */
- while (size) {
- s = gfs2_tune_get(sdp, gt_max_atomic_write);
- if (s > size)
- s = size;
-
- error = do_write_direct_alloc(file, buf, s, offset);
- if (error < 0)
- goto out_gunlock;
-
- buf += error;
- size -= error;
- count += error;
- }
- } else {
- struct iovec local_iov = { .iov_base = buf, .iov_len = size };
- struct gfs2_holder t_gh;
-
- clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
-
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- GL_NEVER_RECURSE, &t_gh);
- if (error)
- goto out_gunlock;
-
- count = generic_file_write_nolock(file, &local_iov, 1, offset);
-
- gfs2_glock_dq_uninit(&t_gh);
- }
-
- error = 0;
-
- out_gunlock:
- gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
+ init_sync_kiocb(&kiocb, filp);
+ ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ return ret;
}
-/**
- * do_do_write_buf - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size,
- loff_t *offset)
+static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
{
- struct inode *inode = file->f_mapping->host;
- struct gfs2_inode *ip = get_v2ip(inode);
- struct gfs2_sbd *sdp = ip->i_sbd;
- struct gfs2_alloc *al = NULL;
- struct buffer_head *dibh;
- unsigned int data_blocks, ind_blocks;
- int alloc_required, journaled;
- ssize_t count;
- int error;
-
- journaled = gfs2_is_jdata(ip);
-
- gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
-
- error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
- if (error)
- return error;
-
- if (alloc_required) {
- al = gfs2_alloc_get(ip);
-
- error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
- if (error)
- goto fail;
-
- error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
- if (error)
- goto fail_gunlock_q;
-
- al->al_requested = data_blocks + ind_blocks;
-
- error = gfs2_inplace_reserve(ip);
- if (error)
- goto fail_gunlock_q;
-
- error = gfs2_trans_begin(sdp,
- al->al_rgd->rd_ri.ri_length +
- ind_blocks +
- ((journaled) ? data_blocks : 0) +
- RES_DINODE + RES_STATFS + RES_QUOTA,
- 0);
- if (error)
- goto fail_ipres;
- } else {
- error = gfs2_trans_begin(sdp,
- ((journaled) ? data_blocks : 0) +
- RES_DINODE,
- 0);
- if (error)
- goto fail_ipres;
- }
-
- if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
- (~(S_ISUID | S_ISGID)) : (~S_ISUID);
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
+ struct kiocb kiocb;
+ ssize_t ret;
- if (journaled) {
- count = gfs2_jdata_write(ip, buf, *offset, size,
- gfs2_copy_from_user);
- if (count < 0) {
- error = count;
- goto fail_end_trans;
- }
-
- *offset += count;
- } else {
- struct iovec local_iov = { .iov_base = buf, .iov_len = size };
-
- count = generic_file_write_nolock(file, &local_iov, 1, offset);
- if (count < 0) {
- error = count;
- goto fail_end_trans;
- }
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- goto fail_end_trans;
-
- if (ip->i_di.di_size < inode->i_size)
- ip->i_di.di_size = inode->i_size;
- ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(&ip->i_di, dibh->b_data);
- brelse(dibh);
- }
-
- gfs2_trans_end(sdp);
-
- if (file->f_flags & O_SYNC || IS_SYNC(inode)) {
- gfs2_log_flush_glock(ip->i_gl);
- error = filemap_fdatawrite(file->f_mapping);
- if (error == 0)
- error = filemap_fdatawait(file->f_mapping);
- if (error)
- goto fail_ipres;
- }
-
- if (alloc_required) {
- gfs2_assert_warn(sdp, count != size ||
- al->al_alloced);
- gfs2_inplace_release(ip);
- gfs2_quota_unlock(ip);
- gfs2_alloc_put(ip);
- }
-
- return count;
-
- fail_end_trans:
- gfs2_trans_end(sdp);
-
- fail_ipres:
- if (alloc_required)
- gfs2_inplace_release(ip);
-
- fail_gunlock_q:
- if (alloc_required)
- gfs2_quota_unlock(ip);
+ atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
- fail:
- if (alloc_required)
- gfs2_alloc_put(ip);
-
- return error;
+ init_sync_kiocb(&kiocb, filp);
+ ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ return ret;
}
-/**
- * do_write_buf - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @gh: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size,
- loff_t *offset, unsigned int num_gh,
- struct gfs2_holder *ghs)
+static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
+ size_t count, loff_t pos)
{
- struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
- struct gfs2_sbd *sdp = ip->i_sbd;
- size_t s;
- ssize_t count = 0;
- int error;
-
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
-
- error = gfs2_glock_nq_m(num_gh + 1, ghs);
- if (error)
- goto out;
-
- if (num_gh) {
- error = grope_mapping(buf, size);
- if (error)
- goto out_gunlock;
- }
-
- if (file->f_flags & O_APPEND)
- *offset = ip->i_di.di_size;
-
- if (!(file->f_flags & O_LARGEFILE)) {
- error = -EFBIG;
- if (*offset >= MAX_NON_LFS)
- goto out_gunlock;
- if (*offset + size > MAX_NON_LFS)
- size = MAX_NON_LFS - *offset;
- }
-
- /* split large writes into smaller atomic transactions */
- while (size) {
- s = gfs2_tune_get(sdp, gt_max_atomic_write);
- if (s > size)
- s = size;
-
- error = do_do_write_buf(file, buf, s, offset);
- if (error < 0)
- goto out_gunlock;
-
- buf += error;
- size -= error;
- count += error;
- }
-
- error = 0;
+ struct file *filp = iocb->ki_filp;
+ struct iovec local_iov = { .iov_base = buf, .iov_len = count };
- out_gunlock:
- gfs2_glock_dq_m(num_gh + 1, ghs);
+ atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
- out:
- gfs2_holder_uninit(&ghs[num_gh]);
-
- return (count) ? count : error;
+ BUG_ON(iocb->ki_pos != pos);
+ return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
}
-/**
- * gfs2_write - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t gfs2_write(struct file *file, const char __user *buf,
- size_t size, loff_t *offset)
-{
- struct inode *inode = file->f_mapping->host;
- ssize_t count;
-
- atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
-
- if (*offset < 0)
- return -EINVAL;
- if (!access_ok(VERIFY_READ, buf, size))
- return -EFAULT;
-
- mutex_lock(&inode->i_mutex);
- if (file->f_flags & O_DIRECT)
- count = walk_vm(file, buf, size, offset,
- do_write_direct);
- else
- count = walk_vm(file, buf, size, offset, do_write_buf);
- mutex_unlock(&inode->i_mutex);
-
- return count;
-}
/**
* filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
@@ -1158,9 +551,6 @@ static int gfs2_ioctl_flags(struct gfs2_inode *ip, unsigned int cmd, unsigned lo
if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
if (!S_ISREG(ip->i_di.di_mode))
goto out;
- /* FIXME: Would be nice not to require the following test */
- if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size)
- goto out;
}
if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
if (!S_ISDIR(ip->i_di.di_mode))
@@ -1246,21 +636,14 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
return error;
}
- if (gfs2_is_jdata(ip)) {
- if (vma->vm_flags & VM_MAYSHARE)
- error = -EOPNOTSUPP;
- else
- vma->vm_ops = &gfs2_vm_ops_private;
- } else {
- /* This is VM_MAYWRITE instead of VM_WRITE because a call
- to mprotect() can turn on VM_WRITE later. */
-
- if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
- (VM_MAYSHARE | VM_MAYWRITE))
- vma->vm_ops = &gfs2_vm_ops_sharewrite;
- else
- vma->vm_ops = &gfs2_vm_ops_private;
- }
+ /* This is VM_MAYWRITE instead of VM_WRITE because a call
+ to mprotect() can turn on VM_WRITE later. */
+
+ if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
+ (VM_MAYSHARE | VM_MAYWRITE))
+ vma->vm_ops = &gfs2_vm_ops_sharewrite;
+ else
+ vma->vm_ops = &gfs2_vm_ops_private;
gfs2_glock_dq_uninit(&i_gh);
@@ -1313,13 +696,6 @@ static int gfs2_open(struct inode *inode, struct file *file)
if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
file->f_flags |= O_DIRECT;
- /* Don't let the user open O_DIRECT on a jdata file */
-
- if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
- error = -EINVAL;
- goto fail_gunlock;
- }
-
gfs2_glock_dq_uninit(&i_gh);
}
@@ -1446,29 +822,10 @@ static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count,
read_actor_t actor, void *target)
{
struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
- struct gfs2_holder gh;
- ssize_t retval;
atomic_inc(&ip->i_sbd->sd_ops_file);
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-
- retval = gfs2_glock_nq_atime(&gh);
- if (retval)
- goto out;
-
- if (gfs2_is_jdata(ip))
- retval = -EOPNOTSUPP;
- else
- retval = generic_file_sendfile(in_file, offset, count, actor,
- target);
-
- gfs2_glock_dq(&gh);
-
- out:
- gfs2_holder_uninit(&gh);
-
- return retval;
+ return generic_file_sendfile(in_file, offset, count, actor, target);
}
static int do_flock(struct file *file, int cmd, struct file_lock *fl)
@@ -1567,7 +924,11 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
struct file_operations gfs2_file_fops = {
.llseek = gfs2_llseek,
.read = gfs2_read,
- .write = gfs2_write,
+ .readv = gfs2_file_readv,
+ .aio_read = gfs2_file_aio_read,
+ .write = generic_file_write,
+ .writev = generic_file_writev,
+ .aio_write = generic_file_aio_write,
.ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index a1b409ce75e1..8f77bb7896bd 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -155,9 +155,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
if (error)
return NULL;
- if (gfs2_is_jdata(ip))
- goto out;
-
set_bit(GIF_PAGED, &ip->i_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
diff --git a/fs/gfs2/page.c b/fs/gfs2/page.c
index ea31bceac4f2..3542aa6b01c4 100644
--- a/fs/gfs2/page.c
+++ b/fs/gfs2/page.c
@@ -172,8 +172,8 @@ int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
map_bh(bh, inode->i_sb, block);
set_buffer_uptodate(bh);
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
- gfs2_trans_add_databuf(sdp, bh);
+ if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
mark_buffer_dirty(bh);
if (release) {
@@ -245,8 +245,8 @@ int gfs2_block_truncate_page(struct address_space *mapping)
goto unlock;
}
- if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED/* || gfs2_is_jdata(ip)*/)
- gfs2_trans_add_databuf(sdp, bh);
+ if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + offset, 0, length);
@@ -273,7 +273,7 @@ void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
end = start + bsize;
if (end <= from || start >= to)
continue;
- gfs2_trans_add_databuf(ip->i_sbd, bh);
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
}
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 69e8f4e92e57..138fdf559a9a 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -43,20 +43,22 @@
#include <linux/buffer_head.h>
#include <linux/tty.h>
#include <linux/sort.h>
+#include <linux/fs.h>
#include <asm/semaphore.h>
#include "gfs2.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
-#include "jdata.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "super.h"
#include "trans.h"
+#include "inode.h"
#include "ops_file.h"
+#include "ops_address.h"
#define QUOTA_USER 1
#define QUOTA_GROUP 0
@@ -561,6 +563,81 @@ static void do_qc(struct gfs2_quota_data *qd, int64_t change)
up(&sdp->sd_quota_mutex);
}
+/**
+ * gfs2_adjust_quota
+ *
+ * This function was mostly borrowed from gfs2_block_truncate_page which was
+ * in turn mostly borrowed from ext3
+ */
+static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+ int64_t change, struct gfs2_quota_data *qd)
+{
+ struct inode *inode = gfs2_ip2v(ip);
+ struct address_space *mapping = inode->i_mapping;
+ unsigned long index = loc >> PAGE_CACHE_SHIFT;
+ unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+ unsigned blocksize, iblock, pos;
+ struct buffer_head *bh;
+ struct page *page;
+ void *kaddr;
+ __be64 *ptr;
+ u64 value;
+ int err = -EIO;
+
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return -ENOMEM;
+
+ blocksize = inode->i_sb->s_blocksize;
+ iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
+
+ bh = page_buffers(page);
+ pos = blocksize;
+ while (offset >= pos) {
+ bh = bh->b_this_page;
+ iblock++;
+ pos += blocksize;
+ }
+
+ if (!buffer_mapped(bh)) {
+ gfs2_get_block(inode, iblock, bh, 1);
+ if (!buffer_mapped(bh))
+ goto unlock;
+ }
+
+ if (PageUptodate(page))
+ set_buffer_uptodate(bh);
+
+ if (!buffer_uptodate(bh)) {
+ ll_rw_block(READ, 1, &bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ goto unlock;
+ }
+
+ gfs2_trans_add_bh(ip->i_gl, bh, 0);
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ ptr = (__be64 *)(kaddr + offset);
+ value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
+ flush_dcache_page(page);
+ kunmap_atomic(kaddr, KM_USER0);
+ err = 0;
+ qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
+#if 0
+ qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
+ qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
+#endif
+ qd->qd_qb.qb_value = cpu_to_be64(value);
+unlock:
+ unlock_page(page);
+ page_cache_release(page);
+ return err;
+}
+
static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
{
struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
@@ -635,43 +712,14 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
for (x = 0; x < num_qd; x++) {
- char buf[sizeof(struct gfs2_quota)];
- struct gfs2_quota q;
-
qd = qda[x];
offset = qd2offset(qd);
-
- /* The quota file may not be a multiple of
- sizeof(struct gfs2_quota) bytes. */
- memset(buf, 0, sizeof(struct gfs2_quota));
-
- error = gfs2_internal_read(ip, &ra_state, buf, &offset,
- sizeof(struct gfs2_quota));
- if (error < 0)
+ error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
+ (struct gfs2_quota_data *)qd->qd_gl->gl_lvb);
+ if (error)
goto out_end_trans;
- gfs2_quota_in(&q, buf);
- q.qu_value += qda[x]->qd_change_sync;
- gfs2_quota_out(&q, buf);
-
- error = gfs2_jdata_write_mem(ip, buf, offset,
- sizeof(struct gfs2_quota));
- if (error < 0)
- goto out_end_trans;
- else if (error != sizeof(struct gfs2_quota)) {
- error = -EIO;
- goto out_end_trans;
- }
-
do_qc(qd, -qd->qd_change_sync);
-
- memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
- qd->qd_qb.qb_magic = GFS2_MAGIC;
- qd->qd_qb.qb_limit = q.qu_limit;
- qd->qd_qb.qb_warn = q.qu_warn;
- qd->qd_qb.qb_value = q.qu_value;
-
- gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
}
error = 0;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index b014591fa4a4..104e664fa182 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -154,14 +154,13 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
gfs2_attach_bufdata(gl, bh, meta);
bd = get_v2bd(bh);
}
-
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
{
struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
- GFP_KERNEL | __GFP_NOFAIL);
+ GFP_NOFS | __GFP_NOFAIL);
lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
rv->rv_blkno = blkno;
lops_add(sdp, &rv->rv_le);
@@ -197,19 +196,3 @@ void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
lops_add(rgd->rd_sbd, &rgd->rd_le);
}
-void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh)
-{
- struct gfs2_bufdata *bd;
-
- bd = get_v2bd(bh);
- if (!bd) {
- bd = kmalloc(sizeof(struct gfs2_bufdata),
- GFP_NOFS | __GFP_NOFAIL);
- lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
- get_bh(bh);
- bd->bd_bh = bh;
- set_v2bd(bh, bd);
- lops_add(sdp, &bd->bd_le);
- }
-}
-
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 5a7da1e853c9..f7f3e2a3d590 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -35,6 +35,5 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
-void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh);
#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index ad49153c33d1..4fb1704aac10 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -50,6 +50,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname, assertion,
sdp->sd_fsname, function, file, line);
+ dump_stack();
return (me) ? -1 : -2;
}
@@ -75,6 +76,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
if (sdp->sd_args.ar_debug)
BUG();
+ else
+ dump_stack();
sdp->sd_last_warning = jiffies;